Home | History | Annotate | Download | only in Lib
      1 #! /usr/bin/env python
      2 
      3 """The Tab Nanny despises ambiguous indentation.  She knows no mercy.
      4 
      5 tabnanny -- Detection of ambiguous indentation
      6 
      7 For the time being this module is intended to be called as a script.
      8 However it is possible to import it into an IDE and use the function
      9 check() described below.
     10 
     11 Warning: The API provided by this module is likely to change in future
     12 releases; such changes may not be backward compatible.
     13 """
     14 
     15 # Released to the public domain, by Tim Peters, 15 April 1998.
     16 
     17 # XXX Note: this is now a standard library module.
     18 # XXX The API needs to undergo changes however; the current code is too
     19 # XXX script-like.  This will be addressed later.
     20 
     21 __version__ = "6"
     22 
     23 import os
     24 import sys
     25 import getopt
     26 import tokenize
     27 if not hasattr(tokenize, 'NL'):
     28     raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
     29 
     30 __all__ = ["check", "NannyNag", "process_tokens"]
     31 
     32 verbose = 0
     33 filename_only = 0
     34 
     35 def errprint(*args):
     36     sep = ""
     37     for arg in args:
     38         sys.stderr.write(sep + str(arg))
     39         sep = " "
     40     sys.stderr.write("\n")
     41 
     42 def main():
     43     global verbose, filename_only
     44     try:
     45         opts, args = getopt.getopt(sys.argv[1:], "qv")
     46     except getopt.error, msg:
     47         errprint(msg)
     48         return
     49     for o, a in opts:
     50         if o == '-q':
     51             filename_only = filename_only + 1
     52         if o == '-v':
     53             verbose = verbose + 1
     54     if not args:
     55         errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
     56         return
     57     for arg in args:
     58         check(arg)
     59 
     60 class NannyNag(Exception):
     61     """
     62     Raised by tokeneater() if detecting an ambiguous indent.
     63     Captured and handled in check().
     64     """
     65     def __init__(self, lineno, msg, line):
     66         self.lineno, self.msg, self.line = lineno, msg, line
     67     def get_lineno(self):
     68         return self.lineno
     69     def get_msg(self):
     70         return self.msg
     71     def get_line(self):
     72         return self.line
     73 
     74 def check(file):
     75     """check(file_or_dir)
     76 
     77     If file_or_dir is a directory and not a symbolic link, then recursively
     78     descend the directory tree named by file_or_dir, checking all .py files
     79     along the way. If file_or_dir is an ordinary Python source file, it is
     80     checked for whitespace related problems. The diagnostic messages are
     81     written to standard output using the print statement.
     82     """
     83 
     84     if os.path.isdir(file) and not os.path.islink(file):
     85         if verbose:
     86             print "%r: listing directory" % (file,)
     87         names = os.listdir(file)
     88         for name in names:
     89             fullname = os.path.join(file, name)
     90             if (os.path.isdir(fullname) and
     91                 not os.path.islink(fullname) or
     92                 os.path.normcase(name[-3:]) == ".py"):
     93                 check(fullname)
     94         return
     95 
     96     try:
     97         f = open(file)
     98     except IOError, msg:
     99         errprint("%r: I/O Error: %s" % (file, msg))
    100         return
    101 
    102     if verbose > 1:
    103         print "checking %r ..." % file
    104 
    105     try:
    106         process_tokens(tokenize.generate_tokens(f.readline))
    107 
    108     except tokenize.TokenError, msg:
    109         errprint("%r: Token Error: %s" % (file, msg))
    110         return
    111 
    112     except IndentationError, msg:
    113         errprint("%r: Indentation Error: %s" % (file, msg))
    114         return
    115 
    116     except NannyNag, nag:
    117         badline = nag.get_lineno()
    118         line = nag.get_line()
    119         if verbose:
    120             print "%r: *** Line %d: trouble in tab city! ***" % (file, badline)
    121             print "offending line: %r" % (line,)
    122             print nag.get_msg()
    123         else:
    124             if ' ' in file: file = '"' + file + '"'
    125             if filename_only: print file
    126             else: print file, badline, repr(line)
    127         return
    128 
    129     if verbose:
    130         print "%r: Clean bill of health." % (file,)
    131 
    132 class Whitespace:
    133     # the characters used for space and tab
    134     S, T = ' \t'
    135 
    136     # members:
    137     #   raw
    138     #       the original string
    139     #   n
    140     #       the number of leading whitespace characters in raw
    141     #   nt
    142     #       the number of tabs in raw[:n]
    143     #   norm
    144     #       the normal form as a pair (count, trailing), where:
    145     #       count
    146     #           a tuple such that raw[:n] contains count[i]
    147     #           instances of S * i + T
    148     #       trailing
    149     #           the number of trailing spaces in raw[:n]
    150     #       It's A Theorem that m.indent_level(t) ==
    151     #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
    152     #   is_simple
    153     #       true iff raw[:n] is of the form (T*)(S*)
    154 
    155     def __init__(self, ws):
    156         self.raw  = ws
    157         S, T = Whitespace.S, Whitespace.T
    158         count = []
    159         b = n = nt = 0
    160         for ch in self.raw:
    161             if ch == S:
    162                 n = n + 1
    163                 b = b + 1
    164             elif ch == T:
    165                 n = n + 1
    166                 nt = nt + 1
    167                 if b >= len(count):
    168                     count = count + [0] * (b - len(count) + 1)
    169                 count[b] = count[b] + 1
    170                 b = 0
    171             else:
    172                 break
    173         self.n    = n
    174         self.nt   = nt
    175         self.norm = tuple(count), b
    176         self.is_simple = len(count) <= 1
    177 
    178     # return length of longest contiguous run of spaces (whether or not
    179     # preceding a tab)
    180     def longest_run_of_spaces(self):
    181         count, trailing = self.norm
    182         return max(len(count)-1, trailing)
    183 
    184     def indent_level(self, tabsize):
    185         # count, il = self.norm
    186         # for i in range(len(count)):
    187         #    if count[i]:
    188         #        il = il + (i/tabsize + 1)*tabsize * count[i]
    189         # return il
    190 
    191         # quicker:
    192         # il = trailing + sum (i/ts + 1)*ts*count[i] =
    193         # trailing + ts * sum (i/ts + 1)*count[i] =
    194         # trailing + ts * sum i/ts*count[i] + count[i] =
    195         # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
    196         # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
    197         # and note that i/ts*count[i] is 0 when i < ts
    198 
    199         count, trailing = self.norm
    200         il = 0
    201         for i in range(tabsize, len(count)):
    202             il = il + i/tabsize * count[i]
    203         return trailing + tabsize * (il + self.nt)
    204 
    205     # return true iff self.indent_level(t) == other.indent_level(t)
    206     # for all t >= 1
    207     def equal(self, other):
    208         return self.norm == other.norm
    209 
    210     # return a list of tuples (ts, i1, i2) such that
    211     # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
    212     # Intended to be used after not self.equal(other) is known, in which
    213     # case it will return at least one witnessing tab size.
    214     def not_equal_witness(self, other):
    215         n = max(self.longest_run_of_spaces(),
    216                 other.longest_run_of_spaces()) + 1
    217         a = []
    218         for ts in range(1, n+1):
    219             if self.indent_level(ts) != other.indent_level(ts):
    220                 a.append( (ts,
    221                            self.indent_level(ts),
    222                            other.indent_level(ts)) )
    223         return a
    224 
    225     # Return True iff self.indent_level(t) < other.indent_level(t)
    226     # for all t >= 1.
    227     # The algorithm is due to Vincent Broman.
    228     # Easy to prove it's correct.
    229     # XXXpost that.
    230     # Trivial to prove n is sharp (consider T vs ST).
    231     # Unknown whether there's a faster general way.  I suspected so at
    232     # first, but no longer.
    233     # For the special (but common!) case where M and N are both of the
    234     # form (T*)(S*), M.less(N) iff M.len() < N.len() and
    235     # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
    236     # XXXwrite that up.
    237     # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
    238     def less(self, other):
    239         if self.n >= other.n:
    240             return False
    241         if self.is_simple and other.is_simple:
    242             return self.nt <= other.nt
    243         n = max(self.longest_run_of_spaces(),
    244                 other.longest_run_of_spaces()) + 1
    245         # the self.n >= other.n test already did it for ts=1
    246         for ts in range(2, n+1):
    247             if self.indent_level(ts) >= other.indent_level(ts):
    248                 return False
    249         return True
    250 
    251     # return a list of tuples (ts, i1, i2) such that
    252     # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
    253     # Intended to be used after not self.less(other) is known, in which
    254     # case it will return at least one witnessing tab size.
    255     def not_less_witness(self, other):
    256         n = max(self.longest_run_of_spaces(),
    257                 other.longest_run_of_spaces()) + 1
    258         a = []
    259         for ts in range(1, n+1):
    260             if self.indent_level(ts) >= other.indent_level(ts):
    261                 a.append( (ts,
    262                            self.indent_level(ts),
    263                            other.indent_level(ts)) )
    264         return a
    265 
    266 def format_witnesses(w):
    267     firsts = map(lambda tup: str(tup[0]), w)
    268     prefix = "at tab size"
    269     if len(w) > 1:
    270         prefix = prefix + "s"
    271     return prefix + " " + ', '.join(firsts)
    272 
    273 def process_tokens(tokens):
    274     INDENT = tokenize.INDENT
    275     DEDENT = tokenize.DEDENT
    276     NEWLINE = tokenize.NEWLINE
    277     JUNK = tokenize.COMMENT, tokenize.NL
    278     indents = [Whitespace("")]
    279     check_equal = 0
    280 
    281     for (type, token, start, end, line) in tokens:
    282         if type == NEWLINE:
    283             # a program statement, or ENDMARKER, will eventually follow,
    284             # after some (possibly empty) run of tokens of the form
    285             #     (NL | COMMENT)* (INDENT | DEDENT+)?
    286             # If an INDENT appears, setting check_equal is wrong, and will
    287             # be undone when we see the INDENT.
    288             check_equal = 1
    289 
    290         elif type == INDENT:
    291             check_equal = 0
    292             thisguy = Whitespace(token)
    293             if not indents[-1].less(thisguy):
    294                 witness = indents[-1].not_less_witness(thisguy)
    295                 msg = "indent not greater e.g. " + format_witnesses(witness)
    296                 raise NannyNag(start[0], msg, line)
    297             indents.append(thisguy)
    298 
    299         elif type == DEDENT:
    300             # there's nothing we need to check here!  what's important is
    301             # that when the run of DEDENTs ends, the indentation of the
    302             # program statement (or ENDMARKER) that triggered the run is
    303             # equal to what's left at the top of the indents stack
    304 
    305             # Ouch!  This assert triggers if the last line of the source
    306             # is indented *and* lacks a newline -- then DEDENTs pop out
    307             # of thin air.
    308             # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
    309             check_equal = 1
    310 
    311             del indents[-1]
    312 
    313         elif check_equal and type not in JUNK:
    314             # this is the first "real token" following a NEWLINE, so it
    315             # must be the first token of the next program statement, or an
    316             # ENDMARKER; the "line" argument exposes the leading whitespace
    317             # for this statement; in the case of ENDMARKER, line is an empty
    318             # string, so will properly match the empty string with which the
    319             # "indents" stack was seeded
    320             check_equal = 0
    321             thisguy = Whitespace(line)
    322             if not indents[-1].equal(thisguy):
    323                 witness = indents[-1].not_equal_witness(thisguy)
    324                 msg = "indent not equal e.g. " + format_witnesses(witness)
    325                 raise NannyNag(start[0], msg, line)
    326 
    327 
    328 if __name__ == '__main__':
    329     main()
    330