Home | History | Annotate | Download | only in Lib
      1 #! /usr/bin/env python3
      2 
      3 """The Tab Nanny despises ambiguous indentation.  She knows no mercy.
      4 
      5 tabnanny -- Detection of ambiguous indentation
      6 
      7 For the time being this module is intended to be called as a script.
      8 However it is possible to import it into an IDE and use the function
      9 check() described below.
     10 
     11 Warning: The API provided by this module is likely to change in future
     12 releases; such changes may not be backward compatible.
     13 """
     14 
     15 # Released to the public domain, by Tim Peters, 15 April 1998.
     16 
     17 # XXX Note: this is now a standard library module.
     18 # XXX The API needs to undergo changes however; the current code is too
     19 # XXX script-like.  This will be addressed later.
     20 
     21 __version__ = "6"
     22 
     23 import os
     24 import sys
     25 import getopt
     26 import tokenize
     27 if not hasattr(tokenize, 'NL'):
     28     raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
     29 
     30 __all__ = ["check", "NannyNag", "process_tokens"]
     31 
     32 verbose = 0
     33 filename_only = 0
     34 
     35 def errprint(*args):
     36     sep = ""
     37     for arg in args:
     38         sys.stderr.write(sep + str(arg))
     39         sep = " "
     40     sys.stderr.write("\n")
     41 
     42 def main():
     43     global verbose, filename_only
     44     try:
     45         opts, args = getopt.getopt(sys.argv[1:], "qv")
     46     except getopt.error as msg:
     47         errprint(msg)
     48         return
     49     for o, a in opts:
     50         if o == '-q':
     51             filename_only = filename_only + 1
     52         if o == '-v':
     53             verbose = verbose + 1
     54     if not args:
     55         errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
     56         return
     57     for arg in args:
     58         check(arg)
     59 
     60 class NannyNag(Exception):
     61     """
     62     Raised by tokeneater() if detecting an ambiguous indent.
     63     Captured and handled in check().
     64     """
     65     def __init__(self, lineno, msg, line):
     66         self.lineno, self.msg, self.line = lineno, msg, line
     67     def get_lineno(self):
     68         return self.lineno
     69     def get_msg(self):
     70         return self.msg
     71     def get_line(self):
     72         return self.line
     73 
     74 def check(file):
     75     """check(file_or_dir)
     76 
     77     If file_or_dir is a directory and not a symbolic link, then recursively
     78     descend the directory tree named by file_or_dir, checking all .py files
     79     along the way. If file_or_dir is an ordinary Python source file, it is
     80     checked for whitespace related problems. The diagnostic messages are
     81     written to standard output using the print statement.
     82     """
     83 
     84     if os.path.isdir(file) and not os.path.islink(file):
     85         if verbose:
     86             print("%r: listing directory" % (file,))
     87         names = os.listdir(file)
     88         for name in names:
     89             fullname = os.path.join(file, name)
     90             if (os.path.isdir(fullname) and
     91                 not os.path.islink(fullname) or
     92                 os.path.normcase(name[-3:]) == ".py"):
     93                 check(fullname)
     94         return
     95 
     96     try:
     97         f = tokenize.open(file)
     98     except OSError as msg:
     99         errprint("%r: I/O Error: %s" % (file, msg))
    100         return
    101 
    102     if verbose > 1:
    103         print("checking %r ..." % file)
    104 
    105     try:
    106         process_tokens(tokenize.generate_tokens(f.readline))
    107 
    108     except tokenize.TokenError as msg:
    109         errprint("%r: Token Error: %s" % (file, msg))
    110         return
    111 
    112     except IndentationError as msg:
    113         errprint("%r: Indentation Error: %s" % (file, msg))
    114         return
    115 
    116     except NannyNag as nag:
    117         badline = nag.get_lineno()
    118         line = nag.get_line()
    119         if verbose:
    120             print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))
    121             print("offending line: %r" % (line,))
    122             print(nag.get_msg())
    123         else:
    124             if ' ' in file: file = '"' + file + '"'
    125             if filename_only: print(file)
    126             else: print(file, badline, repr(line))
    127         return
    128 
    129     finally:
    130         f.close()
    131 
    132     if verbose:
    133         print("%r: Clean bill of health." % (file,))
    134 
    135 class Whitespace:
    136     # the characters used for space and tab
    137     S, T = ' \t'
    138 
    139     # members:
    140     #   raw
    141     #       the original string
    142     #   n
    143     #       the number of leading whitespace characters in raw
    144     #   nt
    145     #       the number of tabs in raw[:n]
    146     #   norm
    147     #       the normal form as a pair (count, trailing), where:
    148     #       count
    149     #           a tuple such that raw[:n] contains count[i]
    150     #           instances of S * i + T
    151     #       trailing
    152     #           the number of trailing spaces in raw[:n]
    153     #       It's A Theorem that m.indent_level(t) ==
    154     #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
    155     #   is_simple
    156     #       true iff raw[:n] is of the form (T*)(S*)
    157 
    158     def __init__(self, ws):
    159         self.raw  = ws
    160         S, T = Whitespace.S, Whitespace.T
    161         count = []
    162         b = n = nt = 0
    163         for ch in self.raw:
    164             if ch == S:
    165                 n = n + 1
    166                 b = b + 1
    167             elif ch == T:
    168                 n = n + 1
    169                 nt = nt + 1
    170                 if b >= len(count):
    171                     count = count + [0] * (b - len(count) + 1)
    172                 count[b] = count[b] + 1
    173                 b = 0
    174             else:
    175                 break
    176         self.n    = n
    177         self.nt   = nt
    178         self.norm = tuple(count), b
    179         self.is_simple = len(count) <= 1
    180 
    181     # return length of longest contiguous run of spaces (whether or not
    182     # preceding a tab)
    183     def longest_run_of_spaces(self):
    184         count, trailing = self.norm
    185         return max(len(count)-1, trailing)
    186 
    187     def indent_level(self, tabsize):
    188         # count, il = self.norm
    189         # for i in range(len(count)):
    190         #    if count[i]:
    191         #        il = il + (i//tabsize + 1)*tabsize * count[i]
    192         # return il
    193 
    194         # quicker:
    195         # il = trailing + sum (i//ts + 1)*ts*count[i] =
    196         # trailing + ts * sum (i//ts + 1)*count[i] =
    197         # trailing + ts * sum i//ts*count[i] + count[i] =
    198         # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =
    199         # trailing + ts * [(sum i//ts*count[i]) + num_tabs]
    200         # and note that i//ts*count[i] is 0 when i < ts
    201 
    202         count, trailing = self.norm
    203         il = 0
    204         for i in range(tabsize, len(count)):
    205             il = il + i//tabsize * count[i]
    206         return trailing + tabsize * (il + self.nt)
    207 
    208     # return true iff self.indent_level(t) == other.indent_level(t)
    209     # for all t >= 1
    210     def equal(self, other):
    211         return self.norm == other.norm
    212 
    213     # return a list of tuples (ts, i1, i2) such that
    214     # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
    215     # Intended to be used after not self.equal(other) is known, in which
    216     # case it will return at least one witnessing tab size.
    217     def not_equal_witness(self, other):
    218         n = max(self.longest_run_of_spaces(),
    219                 other.longest_run_of_spaces()) + 1
    220         a = []
    221         for ts in range(1, n+1):
    222             if self.indent_level(ts) != other.indent_level(ts):
    223                 a.append( (ts,
    224                            self.indent_level(ts),
    225                            other.indent_level(ts)) )
    226         return a
    227 
    228     # Return True iff self.indent_level(t) < other.indent_level(t)
    229     # for all t >= 1.
    230     # The algorithm is due to Vincent Broman.
    231     # Easy to prove it's correct.
    232     # XXXpost that.
    233     # Trivial to prove n is sharp (consider T vs ST).
    234     # Unknown whether there's a faster general way.  I suspected so at
    235     # first, but no longer.
    236     # For the special (but common!) case where M and N are both of the
    237     # form (T*)(S*), M.less(N) iff M.len() < N.len() and
    238     # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
    239     # XXXwrite that up.
    240     # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
    241     def less(self, other):
    242         if self.n >= other.n:
    243             return False
    244         if self.is_simple and other.is_simple:
    245             return self.nt <= other.nt
    246         n = max(self.longest_run_of_spaces(),
    247                 other.longest_run_of_spaces()) + 1
    248         # the self.n >= other.n test already did it for ts=1
    249         for ts in range(2, n+1):
    250             if self.indent_level(ts) >= other.indent_level(ts):
    251                 return False
    252         return True
    253 
    254     # return a list of tuples (ts, i1, i2) such that
    255     # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
    256     # Intended to be used after not self.less(other) is known, in which
    257     # case it will return at least one witnessing tab size.
    258     def not_less_witness(self, other):
    259         n = max(self.longest_run_of_spaces(),
    260                 other.longest_run_of_spaces()) + 1
    261         a = []
    262         for ts in range(1, n+1):
    263             if self.indent_level(ts) >= other.indent_level(ts):
    264                 a.append( (ts,
    265                            self.indent_level(ts),
    266                            other.indent_level(ts)) )
    267         return a
    268 
    269 def format_witnesses(w):
    270     firsts = (str(tup[0]) for tup in w)
    271     prefix = "at tab size"
    272     if len(w) > 1:
    273         prefix = prefix + "s"
    274     return prefix + " " + ', '.join(firsts)
    275 
    276 def process_tokens(tokens):
    277     INDENT = tokenize.INDENT
    278     DEDENT = tokenize.DEDENT
    279     NEWLINE = tokenize.NEWLINE
    280     JUNK = tokenize.COMMENT, tokenize.NL
    281     indents = [Whitespace("")]
    282     check_equal = 0
    283 
    284     for (type, token, start, end, line) in tokens:
    285         if type == NEWLINE:
    286             # a program statement, or ENDMARKER, will eventually follow,
    287             # after some (possibly empty) run of tokens of the form
    288             #     (NL | COMMENT)* (INDENT | DEDENT+)?
    289             # If an INDENT appears, setting check_equal is wrong, and will
    290             # be undone when we see the INDENT.
    291             check_equal = 1
    292 
    293         elif type == INDENT:
    294             check_equal = 0
    295             thisguy = Whitespace(token)
    296             if not indents[-1].less(thisguy):
    297                 witness = indents[-1].not_less_witness(thisguy)
    298                 msg = "indent not greater e.g. " + format_witnesses(witness)
    299                 raise NannyNag(start[0], msg, line)
    300             indents.append(thisguy)
    301 
    302         elif type == DEDENT:
    303             # there's nothing we need to check here!  what's important is
    304             # that when the run of DEDENTs ends, the indentation of the
    305             # program statement (or ENDMARKER) that triggered the run is
    306             # equal to what's left at the top of the indents stack
    307 
    308             # Ouch!  This assert triggers if the last line of the source
    309             # is indented *and* lacks a newline -- then DEDENTs pop out
    310             # of thin air.
    311             # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
    312             check_equal = 1
    313 
    314             del indents[-1]
    315 
    316         elif check_equal and type not in JUNK:
    317             # this is the first "real token" following a NEWLINE, so it
    318             # must be the first token of the next program statement, or an
    319             # ENDMARKER; the "line" argument exposes the leading whitespace
    320             # for this statement; in the case of ENDMARKER, line is an empty
    321             # string, so will properly match the empty string with which the
    322             # "indents" stack was seeded
    323             check_equal = 0
    324             thisguy = Whitespace(line)
    325             if not indents[-1].equal(thisguy):
    326                 witness = indents[-1].not_equal_witness(thisguy)
    327                 msg = "indent not equal e.g. " + format_witnesses(witness)
    328                 raise NannyNag(start[0], msg, line)
    329 
    330 
    331 if __name__ == '__main__':
    332     main()
    333