Home | History | Annotate | Download | only in thirdparty
      1 #!/usr/bin/python
      2 # pep8.py - Check Python source code formatting, according to PEP 8
      3 # Copyright (C) 2006 Johann C. Rocholl <johann (at] rocholl.net>
      4 #
      5 # Permission is hereby granted, free of charge, to any person
      6 # obtaining a copy of this software and associated documentation files
      7 # (the "Software"), to deal in the Software without restriction,
      8 # including without limitation the rights to use, copy, modify, merge,
      9 # publish, distribute, sublicense, and/or sell copies of the Software,
     10 # and to permit persons to whom the Software is furnished to do so,
     11 # subject to the following conditions:
     12 #
     13 # The above copyright notice and this permission notice shall be
     14 # included in all copies or substantial portions of the Software.
     15 #
     16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
     19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
     20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
     21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     23 # SOFTWARE.
     24 
     25 """
     26 Check Python source code formatting, according to PEP 8:
     27 http://www.python.org/dev/peps/pep-0008/
     28 
     29 For usage and a list of options, try this:
     30 $ python pep8.py -h
     31 
     32 This program and its regression test suite live here:
     33 http://github.com/jcrocholl/pep8
     34 
     35 Groups of errors and warnings:
     36 E errors
     37 W warnings
     38 100 indentation
     39 200 whitespace
     40 300 blank lines
     41 400 imports
     42 500 line length
     43 600 deprecation
     44 700 statements
     45 
     46 You can add checks to this program by writing plugins. Each plugin is
     47 a simple function that is called for each line of source code, either
     48 physical or logical.
     49 
     50 Physical line:
     51 - Raw line of text from the input file.
     52 
     53 Logical line:
     54 - Multi-line statements converted to a single line.
     55 - Stripped left and right.
     56 - Contents of strings replaced with 'xxx' of same length.
     57 - Comments removed.
     58 
     59 The check function requests physical or logical lines by the name of
     60 the first argument:
     61 
     62 def maximum_line_length(physical_line)
     63 def extraneous_whitespace(logical_line)
     64 def blank_lines(logical_line, blank_lines, indent_level, line_number)
     65 
     66 The last example above demonstrates how check plugins can request
     67 additional information with extra arguments. All attributes of the
     68 Checker object are available. Some examples:
     69 
     70 lines: a list of the raw lines from the input file
     71 tokens: the tokens that contribute to this logical line
     72 line_number: line number in the input file
     73 blank_lines: blank lines before this one
     74 indent_char: first indentation character in this file (' ' or '\t')
     75 indent_level: indentation (with tabs expanded to multiples of 8)
     76 previous_indent_level: indentation on previous line
     77 previous_logical: previous logical line
     78 
     79 The docstring of each check function shall be the relevant part of
     80 text from PEP 8. It is printed if the user enables --show-pep8.
     81 Several docstrings contain examples directly from the PEP 8 document.
     82 
     83 Okay: spam(ham[1], {eggs: 2})
     84 E201: spam( ham[1], {eggs: 2})
     85 
     86 These examples are verified automatically when pep8.py is run with the
     87 --doctest option. You can add examples for your own check functions.
     88 The format is simple: "Okay" or error/warning code followed by colon
     89 and space, the rest of the line is example source code. If you put 'r'
     90 before the docstring, you can use \n for newline, \t for tab and \s
     91 for space.
     92 
     93 """
     94 
     95 __version__ = '0.5.0'
     96 
     97 import os
     98 import sys
     99 import re
    100 import time
    101 import inspect
    102 import tokenize
    103 from optparse import OptionParser
    104 from keyword import iskeyword
    105 from fnmatch import fnmatch
    106 
    107 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git'
    108 DEFAULT_IGNORE = ['E24']
    109 
    110 INDENT_REGEX = re.compile(r'([ \t]*)')
    111 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)')
    112 SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)')
    113 ERRORCODE_REGEX = re.compile(r'[EW]\d{3}')
    114 E301NOT_REGEX = re.compile(r'class |def |u?r?["\']')
    115 
    116 WHITESPACE = ' \t'
    117 
    118 BINARY_OPERATORS = ['**=', '*=', '+=', '-=', '!=', '<>',
    119     '%=', '^=', '&=', '|=', '==', '/=', '//=', '>=', '<=', '>>=', '<<=',
    120     '%',  '^',  '&',  '|',  '=',  '/',  '//',  '>',  '<',  '>>',  '<<']
    121 UNARY_OPERATORS = ['**', '*', '+', '-']
    122 OPERATORS = BINARY_OPERATORS + UNARY_OPERATORS
    123 
    124 options = None
    125 args = None
    126 
    127 
    128 ##############################################################################
    129 # Plugins (check functions) for physical lines
    130 ##############################################################################
    131 
    132 
    133 def tabs_or_spaces(physical_line, indent_char):
    134     r"""
    135     Never mix tabs and spaces.
    136 
    137     The most popular way of indenting Python is with spaces only.  The
    138     second-most popular way is with tabs only.  Code indented with a mixture
    139     of tabs and spaces should be converted to using spaces exclusively.  When
    140     invoking the Python command line interpreter with the -t option, it issues
    141     warnings about code that illegally mixes tabs and spaces.  When using -tt
    142     these warnings become errors.  These options are highly recommended!
    143 
    144     Okay: if a == 0:\n        a = 1\n        b = 1
    145     E101: if a == 0:\n        a = 1\n\tb = 1
    146     """
    147     indent = INDENT_REGEX.match(physical_line).group(1)
    148     for offset, char in enumerate(indent):
    149         if char != indent_char:
    150             return offset, "E101 indentation contains mixed spaces and tabs"
    151 
    152 
    153 def tabs_obsolete(physical_line):
    154     r"""
    155     For new projects, spaces-only are strongly recommended over tabs.  Most
    156     editors have features that make this easy to do.
    157 
    158     Okay: if True:\n    return
    159     W191: if True:\n\treturn
    160     """
    161     indent = INDENT_REGEX.match(physical_line).group(1)
    162     if indent.count('\t'):
    163         return indent.index('\t'), "W191 indentation contains tabs"
    164 
    165 
    166 def trailing_whitespace(physical_line):
    167     """
    168     JCR: Trailing whitespace is superfluous.
    169 
    170     Okay: spam(1)
    171     W291: spam(1)\s
    172     """
    173     physical_line = physical_line.rstrip('\n')    # chr(10), newline
    174     physical_line = physical_line.rstrip('\r')    # chr(13), carriage return
    175     physical_line = physical_line.rstrip('\x0c')  # chr(12), form feed, ^L
    176     stripped = physical_line.rstrip()
    177     if physical_line != stripped:
    178         return len(stripped), "W291 trailing whitespace"
    179 
    180 
    181 def trailing_blank_lines(physical_line, lines, line_number):
    182     r"""
    183     JCR: Trailing blank lines are superfluous.
    184 
    185     Okay: spam(1)
    186     W391: spam(1)\n
    187     """
    188     if physical_line.strip() == '' and line_number == len(lines):
    189         return 0, "W391 blank line at end of file"
    190 
    191 
    192 def missing_newline(physical_line):
    193     """
    194     JCR: The last line should have a newline.
    195     """
    196     if physical_line.rstrip() == physical_line:
    197         return len(physical_line), "W292 no newline at end of file"
    198 
    199 
    200 def maximum_line_length(physical_line):
    201     """
    202     Limit all lines to a maximum of 79 characters.
    203 
    204     There are still many devices around that are limited to 80 character
    205     lines; plus, limiting windows to 80 characters makes it possible to have
    206     several windows side-by-side.  The default wrapping on such devices looks
    207     ugly.  Therefore, please limit all lines to a maximum of 79 characters.
    208     For flowing long blocks of text (docstrings or comments), limiting the
    209     length to 72 characters is recommended.
    210     """
    211     length = len(physical_line.rstrip())
    212     if length > 79:
    213         return 79, "E501 line too long (%d characters)" % length
    214 
    215 
    216 ##############################################################################
    217 # Plugins (check functions) for logical lines
    218 ##############################################################################
    219 
    220 
    221 def blank_lines(logical_line, blank_lines, indent_level, line_number,
    222                 previous_logical, blank_lines_before_comment):
    223     r"""
    224     Separate top-level function and class definitions with two blank lines.
    225 
    226     Method definitions inside a class are separated by a single blank line.
    227 
    228     Extra blank lines may be used (sparingly) to separate groups of related
    229     functions.  Blank lines may be omitted between a bunch of related
    230     one-liners (e.g. a set of dummy implementations).
    231 
    232     Use blank lines in functions, sparingly, to indicate logical sections.
    233 
    234     Okay: def a():\n    pass\n\n\ndef b():\n    pass
    235     Okay: def a():\n    pass\n\n\n# Foo\n# Bar\n\ndef b():\n    pass
    236 
    237     E301: class Foo:\n    b = 0\n    def bar():\n        pass
    238     E302: def a():\n    pass\n\ndef b(n):\n    pass
    239     E303: def a():\n    pass\n\n\n\ndef b(n):\n    pass
    240     E303: def a():\n\n\n\n    pass
    241     E304: @decorator\n\ndef a():\n    pass
    242     """
    243     if line_number == 1:
    244         return  # Don't expect blank lines before the first line
    245     max_blank_lines = max(blank_lines, blank_lines_before_comment)
    246     if previous_logical.startswith('@'):
    247         if max_blank_lines:
    248             return 0, "E304 blank lines found after function decorator"
    249     elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2):
    250         return 0, "E303 too many blank lines (%d)" % max_blank_lines
    251     elif (logical_line.startswith('def ') or
    252           logical_line.startswith('class ') or
    253           logical_line.startswith('@')):
    254         if indent_level:
    255             if not (max_blank_lines or E301NOT_REGEX.match(previous_logical)):
    256                 return 0, "E301 expected 1 blank line, found 0"
    257         elif max_blank_lines != 2:
    258             return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines
    259 
    260 
    261 def extraneous_whitespace(logical_line):
    262     """
    263     Avoid extraneous whitespace in the following situations:
    264 
    265     - Immediately inside parentheses, brackets or braces.
    266 
    267     - Immediately before a comma, semicolon, or colon.
    268 
    269     Okay: spam(ham[1], {eggs: 2})
    270     E201: spam( ham[1], {eggs: 2})
    271     E201: spam(ham[ 1], {eggs: 2})
    272     E201: spam(ham[1], { eggs: 2})
    273     E202: spam(ham[1], {eggs: 2} )
    274     E202: spam(ham[1 ], {eggs: 2})
    275     E202: spam(ham[1], {eggs: 2 })
    276 
    277     E203: if x == 4: print x, y; x, y = y , x
    278     E203: if x == 4: print x, y ; x, y = y, x
    279     E203: if x == 4 : print x, y; x, y = y, x
    280     """
    281     line = logical_line
    282     for char in '([{':
    283         found = line.find(char + ' ')
    284         if found > -1:
    285             return found + 1, "E201 whitespace after '%s'" % char
    286     for char in '}])':
    287         found = line.find(' ' + char)
    288         if found > -1 and line[found - 1] != ',':
    289             return found, "E202 whitespace before '%s'" % char
    290     for char in ',;:':
    291         found = line.find(' ' + char)
    292         if found > -1:
    293             return found, "E203 whitespace before '%s'" % char
    294 
    295 
    296 def missing_whitespace(logical_line):
    297     """
    298     JCR: Each comma, semicolon or colon should be followed by whitespace.
    299 
    300     Okay: [a, b]
    301     Okay: (3,)
    302     Okay: a[1:4]
    303     Okay: a[:4]
    304     Okay: a[1:]
    305     Okay: a[1:4:2]
    306     E231: ['a','b']
    307     E231: foo(bar,baz)
    308     """
    309     line = logical_line
    310     for index in range(len(line) - 1):
    311         char = line[index]
    312         if char in ',;:' and line[index + 1] not in WHITESPACE:
    313             before = line[:index]
    314             if char == ':' and before.count('[') > before.count(']'):
    315                 continue  # Slice syntax, no space required
    316             if char == ',' and line[index + 1] == ')':
    317                 continue  # Allow tuple with only one element: (3,)
    318             return index, "E231 missing whitespace after '%s'" % char
    319 
    320 
    321 def indentation(logical_line, previous_logical, indent_char,
    322                 indent_level, previous_indent_level):
    323     r"""
    324     Use 4 spaces per indentation level.
    325 
    326     For really old code that you don't want to mess up, you can continue to
    327     use 8-space tabs.
    328 
    329     Okay: a = 1
    330     Okay: if a == 0:\n    a = 1
    331     E111:   a = 1
    332 
    333     Okay: for item in items:\n    pass
    334     E112: for item in items:\npass
    335 
    336     Okay: a = 1\nb = 2
    337     E113: a = 1\n    b = 2
    338     """
    339     if indent_char == ' ' and indent_level % 4:
    340         return 0, "E111 indentation is not a multiple of four"
    341     indent_expect = previous_logical.endswith(':')
    342     if indent_expect and indent_level <= previous_indent_level:
    343         return 0, "E112 expected an indented block"
    344     if indent_level > previous_indent_level and not indent_expect:
    345         return 0, "E113 unexpected indentation"
    346 
    347 
    348 def whitespace_before_parameters(logical_line, tokens):
    349     """
    350     Avoid extraneous whitespace in the following situations:
    351 
    352     - Immediately before the open parenthesis that starts the argument
    353       list of a function call.
    354 
    355     - Immediately before the open parenthesis that starts an indexing or
    356       slicing.
    357 
    358     Okay: spam(1)
    359     E211: spam (1)
    360 
    361     Okay: dict['key'] = list[index]
    362     E211: dict ['key'] = list[index]
    363     E211: dict['key'] = list [index]
    364     """
    365     prev_type = tokens[0][0]
    366     prev_text = tokens[0][1]
    367     prev_end = tokens[0][3]
    368     for index in range(1, len(tokens)):
    369         token_type, text, start, end, line = tokens[index]
    370         if (token_type == tokenize.OP and
    371             text in '([' and
    372             start != prev_end and
    373             prev_type == tokenize.NAME and
    374             (index < 2 or tokens[index - 2][1] != 'class') and
    375             (not iskeyword(prev_text))):
    376             return prev_end, "E211 whitespace before '%s'" % text
    377         prev_type = token_type
    378         prev_text = text
    379         prev_end = end
    380 
    381 
    382 def whitespace_around_operator(logical_line):
    383     """
    384     Avoid extraneous whitespace in the following situations:
    385 
    386     - More than one space around an assignment (or other) operator to
    387       align it with another.
    388 
    389     Okay: a = 12 + 3
    390     E221: a = 4  + 5
    391     E222: a = 4 +  5
    392     E223: a = 4\t+ 5
    393     E224: a = 4 +\t5
    394     """
    395     line = logical_line
    396     for operator in OPERATORS:
    397         found = line.find('  ' + operator)
    398         if found > -1:
    399             return found, "E221 multiple spaces before operator"
    400         found = line.find(operator + '  ')
    401         if found > -1:
    402             return found, "E222 multiple spaces after operator"
    403         found = line.find('\t' + operator)
    404         if found > -1:
    405             return found, "E223 tab before operator"
    406         found = line.find(operator + '\t')
    407         if found > -1:
    408             return found, "E224 tab after operator"
    409 
    410 
    411 def missing_whitespace_around_operator(logical_line, tokens):
    412     r"""
    413     - Always surround these binary operators with a single space on
    414       either side: assignment (=), augmented assignment (+=, -= etc.),
    415       comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not),
    416       Booleans (and, or, not).
    417 
    418     - Use spaces around arithmetic operators.
    419 
    420     Okay: i = i + 1
    421     Okay: submitted += 1
    422     Okay: x = x * 2 - 1
    423     Okay: hypot2 = x * x + y * y
    424     Okay: c = (a + b) * (a - b)
    425     Okay: foo(bar, key='word', *args, **kwargs)
    426     Okay: baz(**kwargs)
    427     Okay: negative = -1
    428     Okay: spam(-1)
    429     Okay: alpha[:-i]
    430     Okay: if not -5 < x < +5:\n    pass
    431     Okay: lambda *args, **kw: (args, kw)
    432 
    433     E225: i=i+1
    434     E225: submitted +=1
    435     E225: x = x*2 - 1
    436     E225: hypot2 = x*x + y*y
    437     E225: c = (a+b) * (a-b)
    438     E225: c = alpha -4
    439     E225: z = x **y
    440     """
    441     parens = 0
    442     need_space = False
    443     prev_type = tokenize.OP
    444     prev_text = prev_end = None
    445     for token_type, text, start, end, line in tokens:
    446         if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN):
    447             # ERRORTOKEN is triggered by backticks in Python 3000
    448             continue
    449         if text in ('(', 'lambda'):
    450             parens += 1
    451         elif text == ')':
    452             parens -= 1
    453         if need_space:
    454             if start == prev_end:
    455                 return prev_end, "E225 missing whitespace around operator"
    456             need_space = False
    457         elif token_type == tokenize.OP:
    458             if text == '=' and parens:
    459                 # Allow keyword args or defaults: foo(bar=None).
    460                 pass
    461             elif text in BINARY_OPERATORS:
    462                 need_space = True
    463             elif text in UNARY_OPERATORS:
    464                 if ((prev_type != tokenize.OP or prev_text in '}])') and not
    465                     (prev_type == tokenize.NAME and iskeyword(prev_text))):
    466                     # Allow unary operators: -123, -x, +1.
    467                     # Allow argument unpacking: foo(*args, **kwargs).
    468                     need_space = True
    469             if need_space and start == prev_end:
    470                 return prev_end, "E225 missing whitespace around operator"
    471         prev_type = token_type
    472         prev_text = text
    473         prev_end = end
    474 
    475 
    476 def whitespace_around_comma(logical_line):
    477     """
    478     Avoid extraneous whitespace in the following situations:
    479 
    480     - More than one space around an assignment (or other) operator to
    481       align it with another.
    482 
    483     JCR: This should also be applied around comma etc.
    484     Note: these checks are disabled by default
    485 
    486     Okay: a = (1, 2)
    487     E241: a = (1,  2)
    488     E242: a = (1,\t2)
    489     """
    490     line = logical_line
    491     for separator in ',;:':
    492         found = line.find(separator + '  ')
    493         if found > -1:
    494             return found + 1, "E241 multiple spaces after '%s'" % separator
    495         found = line.find(separator + '\t')
    496         if found > -1:
    497             return found + 1, "E242 tab after '%s'" % separator
    498 
    499 
    500 def whitespace_around_named_parameter_equals(logical_line):
    501     """
    502     Don't use spaces around the '=' sign when used to indicate a
    503     keyword argument or a default parameter value.
    504 
    505     Okay: def complex(real, imag=0.0):
    506     Okay: return magic(r=real, i=imag)
    507     Okay: boolean(a == b)
    508     Okay: boolean(a != b)
    509     Okay: boolean(a <= b)
    510     Okay: boolean(a >= b)
    511 
    512     E251: def complex(real, imag = 0.0):
    513     E251: return magic(r = real, i = imag)
    514     """
    515     parens = 0
    516     window = '   '
    517     equal_ok = ['==', '!=', '<=', '>=']
    518 
    519     for pos, c in enumerate(logical_line):
    520         window = window[1:] + c
    521         if parens:
    522             if window[0] in WHITESPACE and window[1] == '=':
    523                 if window[1:] not in equal_ok:
    524                     issue = "E251 no spaces around keyword / parameter equals"
    525                     return pos, issue
    526             if window[2] in WHITESPACE and window[1] == '=':
    527                 if window[:2] not in equal_ok:
    528                     issue = "E251 no spaces around keyword / parameter equals"
    529                     return pos, issue
    530         if c == '(':
    531             parens += 1
    532         elif c == ')':
    533             parens -= 1
    534 
    535 
    536 def whitespace_before_inline_comment(logical_line, tokens):
    537     """
    538     Separate inline comments by at least two spaces.
    539 
    540     An inline comment is a comment on the same line as a statement.  Inline
    541     comments should be separated by at least two spaces from the statement.
    542     They should start with a # and a single space.
    543 
    544     Okay: x = x + 1  # Increment x
    545     Okay: x = x + 1    # Increment x
    546     E261: x = x + 1 # Increment x
    547     E262: x = x + 1  #Increment x
    548     E262: x = x + 1  #  Increment x
    549     """
    550     prev_end = (0, 0)
    551     for token_type, text, start, end, line in tokens:
    552         if token_type == tokenize.NL:
    553             continue
    554         if token_type == tokenize.COMMENT:
    555             if not line[:start[1]].strip():
    556                 continue
    557             if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
    558                 return (prev_end,
    559                         "E261 at least two spaces before inline comment")
    560             if (len(text) > 1 and text.startswith('#  ')
    561                            or not text.startswith('# ')):
    562                 return start, "E262 inline comment should start with '# '"
    563         else:
    564             prev_end = end
    565 
    566 
    567 def imports_on_separate_lines(logical_line):
    568     r"""
    569     Imports should usually be on separate lines.
    570 
    571     Okay: import os\nimport sys
    572     E401: import sys, os
    573 
    574     Okay: from subprocess import Popen, PIPE
    575     Okay: from myclas import MyClass
    576     Okay: from foo.bar.yourclass import YourClass
    577     Okay: import myclass
    578     Okay: import foo.bar.yourclass
    579     """
    580     line = logical_line
    581     if line.startswith('import '):
    582         found = line.find(',')
    583         if found > -1:
    584             return found, "E401 multiple imports on one line"
    585 
    586 
    587 def compound_statements(logical_line):
    588     r"""
    589     Compound statements (multiple statements on the same line) are
    590     generally discouraged.
    591 
    592     While sometimes it's okay to put an if/for/while with a small body
    593     on the same line, never do this for multi-clause statements. Also
    594     avoid folding such long lines!
    595 
    596     Okay: if foo == 'blah':\n    do_blah_thing()
    597     Okay: do_one()
    598     Okay: do_two()
    599     Okay: do_three()
    600 
    601     E701: if foo == 'blah': do_blah_thing()
    602     E701: for x in lst: total += x
    603     E701: while t < 10: t = delay()
    604     E701: if foo == 'blah': do_blah_thing()
    605     E701: else: do_non_blah_thing()
    606     E701: try: something()
    607     E701: finally: cleanup()
    608     E701: if foo == 'blah': one(); two(); three()
    609 
    610     E702: do_one(); do_two(); do_three()
    611     """
    612     line = logical_line
    613     found = line.find(':')
    614     if -1 < found < len(line) - 1:
    615         before = line[:found]
    616         if (before.count('{') <= before.count('}') and  # {'a': 1} (dict)
    617             before.count('[') <= before.count(']') and  # [1:2] (slice)
    618             not re.search(r'\blambda\b', before)):      # lambda x: x
    619             return found, "E701 multiple statements on one line (colon)"
    620     found = line.find(';')
    621     if -1 < found:
    622         return found, "E702 multiple statements on one line (semicolon)"
    623 
    624 
    625 def python_3000_has_key(logical_line):
    626     """
    627     The {}.has_key() method will be removed in the future version of
    628     Python. Use the 'in' operation instead, like:
    629     d = {"a": 1, "b": 2}
    630     if "b" in d:
    631         print d["b"]
    632     """
    633     pos = logical_line.find('.has_key(')
    634     if pos > -1:
    635         return pos, "W601 .has_key() is deprecated, use 'in'"
    636 
    637 
    638 def python_3000_raise_comma(logical_line):
    639     """
    640     When raising an exception, use "raise ValueError('message')"
    641     instead of the older form "raise ValueError, 'message'".
    642 
    643     The paren-using form is preferred because when the exception arguments
    644     are long or include string formatting, you don't need to use line
    645     continuation characters thanks to the containing parentheses.  The older
    646     form will be removed in Python 3000.
    647     """
    648     match = RAISE_COMMA_REGEX.match(logical_line)
    649     if match:
    650         return match.start(1), "W602 deprecated form of raising exception"
    651 
    652 
    653 def python_3000_not_equal(logical_line):
    654     """
    655     != can also be written <>, but this is an obsolete usage kept for
    656     backwards compatibility only. New code should always use !=.
    657     The older syntax is removed in Python 3000.
    658     """
    659     pos = logical_line.find('<>')
    660     if pos > -1:
    661         return pos, "W603 '<>' is deprecated, use '!='"
    662 
    663 
    664 def python_3000_backticks(logical_line):
    665     """
    666     Backticks are removed in Python 3000.
    667     Use repr() instead.
    668     """
    669     pos = logical_line.find('`')
    670     if pos > -1:
    671         return pos, "W604 backticks are deprecated, use 'repr()'"
    672 
    673 
    674 ##############################################################################
    675 # Helper functions
    676 ##############################################################################
    677 
    678 
    679 def expand_indent(line):
    680     """
    681     Return the amount of indentation.
    682     Tabs are expanded to the next multiple of 8.
    683 
    684     >>> expand_indent('    ')
    685     4
    686     >>> expand_indent('\\t')
    687     8
    688     >>> expand_indent('    \\t')
    689     8
    690     >>> expand_indent('       \\t')
    691     8
    692     >>> expand_indent('        \\t')
    693     16
    694     """
    695     result = 0
    696     for char in line:
    697         if char == '\t':
    698             result = result // 8 * 8 + 8
    699         elif char == ' ':
    700             result += 1
    701         else:
    702             break
    703     return result
    704 
    705 
    706 def mute_string(text):
    707     """
    708     Replace contents with 'xxx' to prevent syntax matching.
    709 
    710     >>> mute_string('"abc"')
    711     '"xxx"'
    712     >>> mute_string("'''abc'''")
    713     "'''xxx'''"
    714     >>> mute_string("r'abc'")
    715     "r'xxx'"
    716     """
    717     start = 1
    718     end = len(text) - 1
    719     # String modifiers (e.g. u or r)
    720     if text.endswith('"'):
    721         start += text.index('"')
    722     elif text.endswith("'"):
    723         start += text.index("'")
    724     # Triple quotes
    725     if text.endswith('"""') or text.endswith("'''"):
    726         start += 2
    727         end -= 2
    728     return text[:start] + 'x' * (end - start) + text[end:]
    729 
    730 
    731 def message(text):
    732     """Print a message."""
    733     # print >> sys.stderr, options.prog + ': ' + text
    734     # print >> sys.stderr, text
    735     print(text)
    736 
    737 
    738 ##############################################################################
    739 # Framework to run all checks
    740 ##############################################################################
    741 
    742 
    743 def find_checks(argument_name):
    744     """
    745     Find all globally visible functions where the first argument name
    746     starts with argument_name.
    747     """
    748     checks = []
    749     for name, function in globals().items():
    750         if not inspect.isfunction(function):
    751             continue
    752         args = inspect.getargspec(function)[0]
    753         if args and args[0].startswith(argument_name):
    754             codes = ERRORCODE_REGEX.findall(inspect.getdoc(function) or '')
    755             for code in codes or ['']:
    756                 if not code or not ignore_code(code):
    757                     checks.append((name, function, args))
    758                     break
    759     checks.sort()
    760     return checks
    761 
    762 
    763 class Checker(object):
    764     """
    765     Load a Python source file, tokenize it, check coding style.
    766     """
    767 
    768     def __init__(self, filename):
    769         if filename:
    770             self.filename = filename
    771             try:
    772                 self.lines = open(filename).readlines()
    773             except UnicodeDecodeError:
    774                 # Errors may occur with non-UTF8 files in Python 3000
    775                 self.lines = open(filename, errors='replace').readlines()
    776         else:
    777             self.filename = 'stdin'
    778             self.lines = []
    779         options.counters['physical lines'] = \
    780             options.counters.get('physical lines', 0) + len(self.lines)
    781 
    782     def readline(self):
    783         """
    784         Get the next line from the input buffer.
    785         """
    786         self.line_number += 1
    787         if self.line_number > len(self.lines):
    788             return ''
    789         return self.lines[self.line_number - 1]
    790 
    791     def readline_check_physical(self):
    792         """
    793         Check and return the next physical line. This method can be
    794         used to feed tokenize.generate_tokens.
    795         """
    796         line = self.readline()
    797         if line:
    798             self.check_physical(line)
    799         return line
    800 
    801     def run_check(self, check, argument_names):
    802         """
    803         Run a check plugin.
    804         """
    805         arguments = []
    806         for name in argument_names:
    807             arguments.append(getattr(self, name))
    808         return check(*arguments)
    809 
    810     def check_physical(self, line):
    811         """
    812         Run all physical checks on a raw input line.
    813         """
    814         self.physical_line = line
    815         if self.indent_char is None and len(line) and line[0] in ' \t':
    816             self.indent_char = line[0]
    817         for name, check, argument_names in options.physical_checks:
    818             result = self.run_check(check, argument_names)
    819             if result is not None:
    820                 offset, text = result
    821                 self.report_error(self.line_number, offset, text, check)
    822 
    823     def build_tokens_line(self):
    824         """
    825         Build a logical line from tokens.
    826         """
    827         self.mapping = []
    828         logical = []
    829         length = 0
    830         previous = None
    831         for token in self.tokens:
    832             token_type, text = token[0:2]
    833             if token_type in (tokenize.COMMENT, tokenize.NL,
    834                               tokenize.INDENT, tokenize.DEDENT,
    835                               tokenize.NEWLINE):
    836                 continue
    837             if token_type == tokenize.STRING:
    838                 text = mute_string(text)
    839             if previous:
    840                 end_line, end = previous[3]
    841                 start_line, start = token[2]
    842                 if end_line != start_line:  # different row
    843                     if self.lines[end_line - 1][end - 1] not in '{[(':
    844                         logical.append(' ')
    845                         length += 1
    846                 elif end != start:  # different column
    847                     fill = self.lines[end_line - 1][end:start]
    848                     logical.append(fill)
    849                     length += len(fill)
    850             self.mapping.append((length, token))
    851             logical.append(text)
    852             length += len(text)
    853             previous = token
    854         self.logical_line = ''.join(logical)
    855         assert self.logical_line.lstrip() == self.logical_line
    856         assert self.logical_line.rstrip() == self.logical_line
    857 
    858     def check_logical(self):
    859         """
    860         Build a line from tokens and run all logical checks on it.
    861         """
    862         options.counters['logical lines'] = \
    863             options.counters.get('logical lines', 0) + 1
    864         self.build_tokens_line()
    865         first_line = self.lines[self.mapping[0][1][2][0] - 1]
    866         indent = first_line[:self.mapping[0][1][2][1]]
    867         self.previous_indent_level = self.indent_level
    868         self.indent_level = expand_indent(indent)
    869         if options.verbose >= 2:
    870             print(self.logical_line[:80].rstrip())
    871         for name, check, argument_names in options.logical_checks:
    872             if options.verbose >= 3:
    873                 print('   ', name)
    874             result = self.run_check(check, argument_names)
    875             if result is not None:
    876                 offset, text = result
    877                 if isinstance(offset, tuple):
    878                     original_number, original_offset = offset
    879                 else:
    880                     for token_offset, token in self.mapping:
    881                         if offset >= token_offset:
    882                             original_number = token[2][0]
    883                             original_offset = (token[2][1]
    884                                                + offset - token_offset)
    885                 self.report_error(original_number, original_offset,
    886                                   text, check)
    887         self.previous_logical = self.logical_line
    888 
    889     def check_all(self):
    890         """
    891         Run all checks on the input file.
    892         """
    893         self.file_errors = 0
    894         self.line_number = 0
    895         self.indent_char = None
    896         self.indent_level = 0
    897         self.previous_logical = ''
    898         self.blank_lines = 0
    899         self.blank_lines_before_comment = 0
    900         self.tokens = []
    901         parens = 0
    902         for token in tokenize.generate_tokens(self.readline_check_physical):
    903             # print(tokenize.tok_name[token[0]], repr(token))
    904             self.tokens.append(token)
    905             token_type, text = token[0:2]
    906             if token_type == tokenize.OP and text in '([{':
    907                 parens += 1
    908             if token_type == tokenize.OP and text in '}])':
    909                 parens -= 1
    910             if token_type == tokenize.NEWLINE and not parens:
    911                 self.check_logical()
    912                 self.blank_lines = 0
    913                 self.blank_lines_before_comment = 0
    914                 self.tokens = []
    915             if token_type == tokenize.NL and not parens:
    916                 if len(self.tokens) <= 1:
    917                     # The physical line contains only this token.
    918                     self.blank_lines += 1
    919                 self.tokens = []
    920             if token_type == tokenize.COMMENT:
    921                 source_line = token[4]
    922                 token_start = token[2][1]
    923                 if source_line[:token_start].strip() == '':
    924                     self.blank_lines_before_comment = max(self.blank_lines,
    925                         self.blank_lines_before_comment)
    926                     self.blank_lines = 0
    927                 if text.endswith('\n') and not parens:
    928                     # The comment also ends a physical line.  This works around
    929                     # Python < 2.6 behaviour, which does not generate NL after
    930                     # a comment which is on a line by itself.
    931                     self.tokens = []
    932         return self.file_errors
    933 
    934     def report_error(self, line_number, offset, text, check):
    935         """
    936         Report an error, according to options.
    937         """
    938         if options.quiet == 1 and not self.file_errors:
    939             message(self.filename)
    940         self.file_errors += 1
    941         code = text[:4]
    942         options.counters[code] = options.counters.get(code, 0) + 1
    943         options.messages[code] = text[5:]
    944         if options.quiet:
    945             return
    946         if options.testsuite:
    947             basename = os.path.basename(self.filename)
    948             if basename[:4] != code:
    949                 return  # Don't care about other errors or warnings
    950             if 'not' not in basename:
    951                 return  # Don't print the expected error message
    952         if ignore_code(code):
    953             return
    954         if options.counters[code] == 1 or options.repeat:
    955             message("%s:%s:%d: %s" %
    956                     (self.filename, line_number, offset + 1, text))
    957             if options.show_source:
    958                 line = self.lines[line_number - 1]
    959                 message(line.rstrip())
    960                 message(' ' * offset + '^')
    961             if options.show_pep8:
    962                 message(check.__doc__.lstrip('\n').rstrip())
    963 
    964 
    965 def input_file(filename):
    966     """
    967     Run all checks on a Python source file.
    968     """
    969     if excluded(filename):
    970         return {}
    971     if options.verbose:
    972         message('checking ' + filename)
    973     files_counter_before = options.counters.get('files', 0)
    974     if options.testsuite:  # Keep showing errors for multiple tests
    975         options.counters = {}
    976     options.counters['files'] = files_counter_before + 1
    977     errors = Checker(filename).check_all()
    978     if options.testsuite:  # Check if the expected error was found
    979         basename = os.path.basename(filename)
    980         code = basename[:4]
    981         count = options.counters.get(code, 0)
    982         if count == 0 and 'not' not in basename:
    983             message("%s: error %s not found" % (filename, code))
    984 
    985 
    986 def input_dir(dirname):
    987     """
    988     Check all Python source files in this directory and all subdirectories.
    989     """
    990     dirname = dirname.rstrip('/')
    991     if excluded(dirname):
    992         return
    993     for root, dirs, files in os.walk(dirname):
    994         if options.verbose:
    995             message('directory ' + root)
    996         options.counters['directories'] = \
    997             options.counters.get('directories', 0) + 1
    998         dirs.sort()
    999         for subdir in dirs:
   1000             if excluded(subdir):
   1001                 dirs.remove(subdir)
   1002         files.sort()
   1003         for filename in files:
   1004             if filename_match(filename):
   1005                 input_file(os.path.join(root, filename))
   1006 
   1007 
   1008 def excluded(filename):
   1009     """
   1010     Check if options.exclude contains a pattern that matches filename.
   1011     """
   1012     basename = os.path.basename(filename)
   1013     for pattern in options.exclude:
   1014         if fnmatch(basename, pattern):
   1015             # print basename, 'excluded because it matches', pattern
   1016             return True
   1017 
   1018 
   1019 def filename_match(filename):
   1020     """
   1021     Check if options.filename contains a pattern that matches filename.
   1022     If options.filename is unspecified, this always returns True.
   1023     """
   1024     if not options.filename:
   1025         return True
   1026     for pattern in options.filename:
   1027         if fnmatch(filename, pattern):
   1028             return True
   1029 
   1030 
   1031 def ignore_code(code):
   1032     """
   1033     Check if options.ignore contains a prefix of the error code.
   1034     If options.select contains a prefix of the error code, do not ignore it.
   1035     """
   1036     for select in options.select:
   1037         if code.startswith(select):
   1038             return False
   1039     for ignore in options.ignore:
   1040         if code.startswith(ignore):
   1041             return True
   1042 
   1043 
   1044 def get_error_statistics():
   1045     """Get error statistics."""
   1046     return get_statistics("E")
   1047 
   1048 
   1049 def get_warning_statistics():
   1050     """Get warning statistics."""
   1051     return get_statistics("W")
   1052 
   1053 
   1054 def get_statistics(prefix=''):
   1055     """
   1056     Get statistics for message codes that start with the prefix.
   1057 
   1058     prefix='' matches all errors and warnings
   1059     prefix='E' matches all errors
   1060     prefix='W' matches all warnings
   1061     prefix='E4' matches all errors that have to do with imports
   1062     """
   1063     stats = []
   1064     keys = list(options.messages.keys())
   1065     keys.sort()
   1066     for key in keys:
   1067         if key.startswith(prefix):
   1068             stats.append('%-7s %s %s' %
   1069                          (options.counters[key], key, options.messages[key]))
   1070     return stats
   1071 
   1072 
   1073 def get_count(prefix=''):
   1074     """Return the total count of errors and warnings."""
   1075     keys = list(options.messages.keys())
   1076     count = 0
   1077     for key in keys:
   1078         if key.startswith(prefix):
   1079             count += options.counters[key]
   1080     return count
   1081 
   1082 
   1083 def print_statistics(prefix=''):
   1084     """Print overall statistics (number of errors and warnings)."""
   1085     for line in get_statistics(prefix):
   1086         print(line)
   1087 
   1088 
   1089 def print_benchmark(elapsed):
   1090     """
   1091     Print benchmark numbers.
   1092     """
   1093     print('%-7.2f %s' % (elapsed, 'seconds elapsed'))
   1094     keys = ['directories', 'files',
   1095             'logical lines', 'physical lines']
   1096     for key in keys:
   1097         if key in options.counters:
   1098             print('%-7d %s per second (%d total)' % (
   1099                 options.counters[key] / elapsed, key,
   1100                 options.counters[key]))
   1101 
   1102 
   1103 def selftest():
   1104     """
   1105     Test all check functions with test cases in docstrings.
   1106     """
   1107     count_passed = 0
   1108     count_failed = 0
   1109     checks = options.physical_checks + options.logical_checks
   1110     for name, check, argument_names in checks:
   1111         for line in check.__doc__.splitlines():
   1112             line = line.lstrip()
   1113             match = SELFTEST_REGEX.match(line)
   1114             if match is None:
   1115                 continue
   1116             code, source = match.groups()
   1117             checker = Checker(None)
   1118             for part in source.split(r'\n'):
   1119                 part = part.replace(r'\t', '\t')
   1120                 part = part.replace(r'\s', ' ')
   1121                 checker.lines.append(part + '\n')
   1122             options.quiet = 2
   1123             options.counters = {}
   1124             checker.check_all()
   1125             error = None
   1126             if code == 'Okay':
   1127                 if len(options.counters) > 1:
   1128                     codes = [key for key in options.counters.keys()
   1129                              if key != 'logical lines']
   1130                     error = "incorrectly found %s" % ', '.join(codes)
   1131             elif options.counters.get(code, 0) == 0:
   1132                 error = "failed to find %s" % code
   1133             if not error:
   1134                 count_passed += 1
   1135             else:
   1136                 count_failed += 1
   1137                 if len(checker.lines) == 1:
   1138                     print("pep8.py: %s: %s" %
   1139                           (error, checker.lines[0].rstrip()))
   1140                 else:
   1141                     print("pep8.py: %s:" % error)
   1142                     for line in checker.lines:
   1143                         print(line.rstrip())
   1144     if options.verbose:
   1145         print("%d passed and %d failed." % (count_passed, count_failed))
   1146         if count_failed:
   1147             print("Test failed.")
   1148         else:
   1149             print("Test passed.")
   1150 
   1151 
   1152 def process_options(arglist=None):
   1153     """
   1154     Process options passed either via arglist or via command line args.
   1155     """
   1156     global options, args
   1157     parser = OptionParser(version=__version__,
   1158                           usage="%prog [options] input ...")
   1159     parser.add_option('-v', '--verbose', default=0, action='count',
   1160                       help="print status messages, or debug with -vv")
   1161     parser.add_option('-q', '--quiet', default=0, action='count',
   1162                       help="report only file names, or nothing with -qq")
   1163     parser.add_option('-r', '--repeat', action='store_true',
   1164                       help="show all occurrences of the same error")
   1165     parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
   1166                       help="exclude files or directories which match these "
   1167                         "comma separated patterns (default: %s)" %
   1168                         DEFAULT_EXCLUDE)
   1169     parser.add_option('--filename', metavar='patterns', default='*.py',
   1170                       help="when parsing directories, only check filenames "
   1171                         "matching these comma separated patterns (default: "
   1172                         "*.py)")
   1173     parser.add_option('--select', metavar='errors', default='',
   1174                       help="select errors and warnings (e.g. E,W6)")
   1175     parser.add_option('--ignore', metavar='errors', default='',
   1176                       help="skip errors and warnings (e.g. E4,W)")
   1177     parser.add_option('--show-source', action='store_true',
   1178                       help="show source code for each error")
   1179     parser.add_option('--show-pep8', action='store_true',
   1180                       help="show text of PEP 8 for each error")
   1181     parser.add_option('--statistics', action='store_true',
   1182                       help="count errors and warnings")
   1183     parser.add_option('--count', action='store_true',
   1184                       help="print total number of errors and warnings "
   1185                         "to standard error and set exit code to 1 if "
   1186                         "total is not null")
   1187     parser.add_option('--benchmark', action='store_true',
   1188                       help="measure processing speed")
   1189     parser.add_option('--testsuite', metavar='dir',
   1190                       help="run regression tests from dir")
   1191     parser.add_option('--doctest', action='store_true',
   1192                       help="run doctest on myself")
   1193     options, args = parser.parse_args(arglist)
   1194     if options.testsuite:
   1195         args.append(options.testsuite)
   1196     if len(args) == 0 and not options.doctest:
   1197         parser.error('input not specified')
   1198     options.prog = os.path.basename(sys.argv[0])
   1199     options.exclude = options.exclude.split(',')
   1200     for index in range(len(options.exclude)):
   1201         options.exclude[index] = options.exclude[index].rstrip('/')
   1202     if options.filename:
   1203         options.filename = options.filename.split(',')
   1204     if options.select:
   1205         options.select = options.select.split(',')
   1206     else:
   1207         options.select = []
   1208     if options.ignore:
   1209         options.ignore = options.ignore.split(',')
   1210     elif options.select:
   1211         # Ignore all checks which are not explicitly selected
   1212         options.ignore = ['']
   1213     elif options.testsuite or options.doctest:
   1214         # For doctest and testsuite, all checks are required
   1215         options.ignore = []
   1216     else:
   1217         # The default choice: ignore controversial checks
   1218         options.ignore = DEFAULT_IGNORE
   1219     options.physical_checks = find_checks('physical_line')
   1220     options.logical_checks = find_checks('logical_line')
   1221     options.counters = {}
   1222     options.messages = {}
   1223     return options, args
   1224 
   1225 
   1226 def _main():
   1227     """
   1228     Parse options and run checks on Python source.
   1229     """
   1230     options, args = process_options()
   1231     if options.doctest:
   1232         import doctest
   1233         doctest.testmod(verbose=options.verbose)
   1234         selftest()
   1235     start_time = time.time()
   1236     for path in args:
   1237         if os.path.isdir(path):
   1238             input_dir(path)
   1239         else:
   1240             input_file(path)
   1241     elapsed = time.time() - start_time
   1242     if options.statistics:
   1243         print_statistics()
   1244     if options.benchmark:
   1245         print_benchmark(elapsed)
   1246     if options.count:
   1247         count = get_count()
   1248         if count:
   1249             sys.stderr.write(str(count) + '\n')
   1250             sys.exit(1)
   1251 
   1252 
   1253 if __name__ == '__main__':
   1254     _main()
   1255