Home | History | Annotate | Download | only in scripts
      1 #! /usr/bin/env python

      2 
      3 # Released to the public domain, by Tim Peters, 03 October 2000.

      4 
      5 """reindent [-d][-r][-v] [ path ... ]
      6 
      7 -d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
      8 -r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
      9 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
     10 -v (--verbose)  Verbose.   Print informative msgs; else no output.
     11 -h (--help)     Help.      Print this usage information and exit.
     12 
     13 Change Python (.py) files to use 4-space indents and no hard tab characters.
     14 Also trim excess spaces and tabs from ends of lines, and remove empty lines
     15 at the end of files.  Also ensure the last line ends with a newline.
     16 
     17 If no paths are given on the command line, reindent operates as a filter,
     18 reading a single source file from standard input and writing the transformed
     19 source to standard output.  In this case, the -d, -r and -v flags are
     20 ignored.
     21 
     22 You can pass one or more file and/or directory paths.  When a directory
     23 path, all .py files within the directory will be examined, and, if the -r
     24 option is given, likewise recursively for subdirectories.
     25 
     26 If output is not to standard output, reindent overwrites files in place,
     27 renaming the originals with a .bak extension.  If it finds nothing to
     28 change, the file is left alone.  If reindent does change a file, the changed
     29 file is a fixed-point for future runs (i.e., running reindent on the
     30 resulting .py file won't change it again).
     31 
     32 The hard part of reindenting is figuring out what to do with comment
     33 lines.  So long as the input files get a clean bill of health from
     34 tabnanny.py, reindent should do a good job.
     35 
     36 The backup file is a copy of the one that is being reindented. The ".bak"
     37 file is generated with shutil.copy(), but some corner cases regarding
     38 user/group and permissions could leave the backup file more readable that
     39 you'd prefer. You can always use the --nobackup option to prevent this.
     40 """
     41 
     42 __version__ = "1"
     43 
     44 import tokenize
     45 import os, shutil
     46 import sys
     47 
     48 verbose    = 0
     49 recurse    = 0
     50 dryrun     = 0
     51 makebackup = True
     52 
     53 def usage(msg=None):
     54     if msg is not None:
     55         print >> sys.stderr, msg
     56     print >> sys.stderr, __doc__
     57 
     58 def errprint(*args):
     59     sep = ""
     60     for arg in args:
     61         sys.stderr.write(sep + str(arg))
     62         sep = " "
     63     sys.stderr.write("\n")
     64 
     65 def main():
     66     import getopt
     67     global verbose, recurse, dryrun, makebackup
     68     try:
     69         opts, args = getopt.getopt(sys.argv[1:], "drnvh",
     70                         ["dryrun", "recurse", "nobackup", "verbose", "help"])
     71     except getopt.error, msg:
     72         usage(msg)
     73         return
     74     for o, a in opts:
     75         if o in ('-d', '--dryrun'):
     76             dryrun += 1
     77         elif o in ('-r', '--recurse'):
     78             recurse += 1
     79         elif o in ('-n', '--nobackup'):
     80             makebackup = False
     81         elif o in ('-v', '--verbose'):
     82             verbose += 1
     83         elif o in ('-h', '--help'):
     84             usage()
     85             return
     86     if not args:
     87         r = Reindenter(sys.stdin)
     88         r.run()
     89         r.write(sys.stdout)
     90         return
     91     for arg in args:
     92         check(arg)
     93 
     94 def check(file):
     95     if os.path.isdir(file) and not os.path.islink(file):
     96         if verbose:
     97             print "listing directory", file
     98         names = os.listdir(file)
     99         for name in names:
    100             fullname = os.path.join(file, name)
    101             if ((recurse and os.path.isdir(fullname) and
    102                  not os.path.islink(fullname) and
    103                  not os.path.split(fullname)[1].startswith("."))
    104                 or name.lower().endswith(".py")):
    105                 check(fullname)
    106         return
    107 
    108     if verbose:
    109         print "checking", file, "...",
    110     try:
    111         f = open(file)
    112     except IOError, msg:
    113         errprint("%s: I/O Error: %s" % (file, str(msg)))
    114         return
    115 
    116     r = Reindenter(f)
    117     f.close()
    118     if r.run():
    119         if verbose:
    120             print "changed."
    121             if dryrun:
    122                 print "But this is a dry run, so leaving it alone."
    123         if not dryrun:
    124             bak = file + ".bak"
    125             if makebackup:
    126                 shutil.copyfile(file, bak)
    127                 if verbose:
    128                     print "backed up", file, "to", bak
    129             f = open(file, "w")
    130             r.write(f)
    131             f.close()
    132             if verbose:
    133                 print "wrote new", file
    134         return True
    135     else:
    136         if verbose:
    137             print "unchanged."
    138         return False
    139 
    140 def _rstrip(line, JUNK='\n \t'):
    141     """Return line stripped of trailing spaces, tabs, newlines.
    142 
    143     Note that line.rstrip() instead also strips sundry control characters,
    144     but at least one known Emacs user expects to keep junk like that, not
    145     mentioning Barry by name or anything <wink>.
    146     """
    147 
    148     i = len(line)
    149     while i > 0 and line[i-1] in JUNK:
    150         i -= 1
    151     return line[:i]
    152 
    153 class Reindenter:
    154 
    155     def __init__(self, f):
    156         self.find_stmt = 1  # next token begins a fresh stmt?

    157         self.level = 0      # current indent level

    158 
    159         # Raw file lines.

    160         self.raw = f.readlines()
    161 
    162         # File lines, rstripped & tab-expanded.  Dummy at start is so

    163         # that we can use tokenize's 1-based line numbering easily.

    164         # Note that a line is all-blank iff it's "\n".

    165         self.lines = [_rstrip(line).expandtabs() + "\n"
    166                       for line in self.raw]
    167         self.lines.insert(0, None)
    168         self.index = 1  # index into self.lines of next line

    169 
    170         # List of (lineno, indentlevel) pairs, one for each stmt and

    171         # comment line.  indentlevel is -1 for comment lines, as a

    172         # signal that tokenize doesn't know what to do about them;

    173         # indeed, they're our headache!

    174         self.stats = []
    175 
    176     def run(self):
    177         tokenize.tokenize(self.getline, self.tokeneater)
    178         # Remove trailing empty lines.

    179         lines = self.lines
    180         while lines and lines[-1] == "\n":
    181             lines.pop()
    182         # Sentinel.

    183         stats = self.stats
    184         stats.append((len(lines), 0))
    185         # Map count of leading spaces to # we want.

    186         have2want = {}
    187         # Program after transformation.

    188         after = self.after = []
    189         # Copy over initial empty lines -- there's nothing to do until

    190         # we see a line with *something* on it.

    191         i = stats[0][0]
    192         after.extend(lines[1:i])
    193         for i in range(len(stats)-1):
    194             thisstmt, thislevel = stats[i]
    195             nextstmt = stats[i+1][0]
    196             have = getlspace(lines[thisstmt])
    197             want = thislevel * 4
    198             if want < 0:
    199                 # A comment line.

    200                 if have:
    201                     # An indented comment line.  If we saw the same

    202                     # indentation before, reuse what it most recently

    203                     # mapped to.

    204                     want = have2want.get(have, -1)
    205                     if want < 0:
    206                         # Then it probably belongs to the next real stmt.

    207                         for j in xrange(i+1, len(stats)-1):
    208                             jline, jlevel = stats[j]
    209                             if jlevel >= 0:
    210                                 if have == getlspace(lines[jline]):
    211                                     want = jlevel * 4
    212                                 break
    213                     if want < 0:           # Maybe it's a hanging

    214                                            # comment like this one,

    215                         # in which case we should shift it like its base

    216                         # line got shifted.

    217                         for j in xrange(i-1, -1, -1):
    218                             jline, jlevel = stats[j]
    219                             if jlevel >= 0:
    220                                 want = have + getlspace(after[jline-1]) - \
    221                                        getlspace(lines[jline])
    222                                 break
    223                     if want < 0:
    224                         # Still no luck -- leave it alone.

    225                         want = have
    226                 else:
    227                     want = 0
    228             assert want >= 0
    229             have2want[have] = want
    230             diff = want - have
    231             if diff == 0 or have == 0:
    232                 after.extend(lines[thisstmt:nextstmt])
    233             else:
    234                 for line in lines[thisstmt:nextstmt]:
    235                     if diff > 0:
    236                         if line == "\n":
    237                             after.append(line)
    238                         else:
    239                             after.append(" " * diff + line)
    240                     else:
    241                         remove = min(getlspace(line), -diff)
    242                         after.append(line[remove:])
    243         return self.raw != self.after
    244 
    245     def write(self, f):
    246         f.writelines(self.after)
    247 
    248     # Line-getter for tokenize.

    249     def getline(self):
    250         if self.index >= len(self.lines):
    251             line = ""
    252         else:
    253             line = self.lines[self.index]
    254             self.index += 1
    255         return line
    256 
    257     # Line-eater for tokenize.

    258     def tokeneater(self, type, token, (sline, scol), end, line,
    259                    INDENT=tokenize.INDENT,
    260                    DEDENT=tokenize.DEDENT,
    261                    NEWLINE=tokenize.NEWLINE,
    262                    COMMENT=tokenize.COMMENT,
    263                    NL=tokenize.NL):
    264 
    265         if type == NEWLINE:
    266             # A program statement, or ENDMARKER, will eventually follow,

    267             # after some (possibly empty) run of tokens of the form

    268             #     (NL | COMMENT)* (INDENT | DEDENT+)?

    269             self.find_stmt = 1
    270 
    271         elif type == INDENT:
    272             self.find_stmt = 1
    273             self.level += 1
    274 
    275         elif type == DEDENT:
    276             self.find_stmt = 1
    277             self.level -= 1
    278 
    279         elif type == COMMENT:
    280             if self.find_stmt:
    281                 self.stats.append((sline, -1))
    282                 # but we're still looking for a new stmt, so leave

    283                 # find_stmt alone

    284 
    285         elif type == NL:
    286             pass
    287 
    288         elif self.find_stmt:
    289             # This is the first "real token" following a NEWLINE, so it

    290             # must be the first token of the next program statement, or an

    291             # ENDMARKER.

    292             self.find_stmt = 0
    293             if line:   # not endmarker

    294                 self.stats.append((sline, self.level))
    295 
    296 # Count number of leading blanks.

    297 def getlspace(line):
    298     i, n = 0, len(line)
    299     while i < n and line[i] == " ":
    300         i += 1
    301     return i
    302 
    303 if __name__ == '__main__':
    304     main()
    305