Home | History | Annotate | Download | only in scripts
      1 #! /usr/bin/env python
      2 
      3 # Released to the public domain, by Tim Peters, 03 October 2000.
      4 
      5 """reindent [-d][-r][-v] [ path ... ]
      6 
      7 -d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
      8 -r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
      9 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
     10 -v (--verbose)  Verbose.   Print informative msgs; else no output.
     11 -h (--help)     Help.      Print this usage information and exit.
     12 
     13 Change Python (.py) files to use 4-space indents and no hard tab characters.
     14 Also trim excess spaces and tabs from ends of lines, and remove empty lines
     15 at the end of files.  Also ensure the last line ends with a newline.
     16 
     17 If no paths are given on the command line, reindent operates as a filter,
     18 reading a single source file from standard input and writing the transformed
     19 source to standard output.  In this case, the -d, -r and -v flags are
     20 ignored.
     21 
     22 You can pass one or more file and/or directory paths.  When a directory
     23 path, all .py files within the directory will be examined, and, if the -r
     24 option is given, likewise recursively for subdirectories.
     25 
     26 If output is not to standard output, reindent overwrites files in place,
     27 renaming the originals with a .bak extension.  If it finds nothing to
     28 change, the file is left alone.  If reindent does change a file, the changed
     29 file is a fixed-point for future runs (i.e., running reindent on the
     30 resulting .py file won't change it again).
     31 
     32 The hard part of reindenting is figuring out what to do with comment
     33 lines.  So long as the input files get a clean bill of health from
     34 tabnanny.py, reindent should do a good job.
     35 
     36 The backup file is a copy of the one that is being reindented. The ".bak"
     37 file is generated with shutil.copy(), but some corner cases regarding
     38 user/group and permissions could leave the backup file more readable than
     39 you'd prefer. You can always use the --nobackup option to prevent this.
     40 """
     41 
     42 __version__ = "1"
     43 
     44 import tokenize
     45 import os, shutil
     46 import sys
     47 import io
     48 
     49 verbose    = 0
     50 recurse    = 0
     51 dryrun     = 0
     52 makebackup = True
     53 
     54 def usage(msg=None):
     55     if msg is not None:
     56         print >> sys.stderr, msg
     57     print >> sys.stderr, __doc__
     58 
     59 def errprint(*args):
     60     sep = ""
     61     for arg in args:
     62         sys.stderr.write(sep + str(arg))
     63         sep = " "
     64     sys.stderr.write("\n")
     65 
     66 def main():
     67     import getopt
     68     global verbose, recurse, dryrun, makebackup
     69     try:
     70         opts, args = getopt.getopt(sys.argv[1:], "drnvh",
     71                         ["dryrun", "recurse", "nobackup", "verbose", "help"])
     72     except getopt.error, msg:
     73         usage(msg)
     74         return
     75     for o, a in opts:
     76         if o in ('-d', '--dryrun'):
     77             dryrun += 1
     78         elif o in ('-r', '--recurse'):
     79             recurse += 1
     80         elif o in ('-n', '--nobackup'):
     81             makebackup = False
     82         elif o in ('-v', '--verbose'):
     83             verbose += 1
     84         elif o in ('-h', '--help'):
     85             usage()
     86             return
     87     if not args:
     88         r = Reindenter(sys.stdin)
     89         r.run()
     90         r.write(sys.stdout)
     91         return
     92     for arg in args:
     93         check(arg)
     94 
     95 def check(file):
     96     if os.path.isdir(file) and not os.path.islink(file):
     97         if verbose:
     98             print "listing directory", file
     99         names = os.listdir(file)
    100         for name in names:
    101             fullname = os.path.join(file, name)
    102             if ((recurse and os.path.isdir(fullname) and
    103                  not os.path.islink(fullname) and
    104                  not os.path.split(fullname)[1].startswith("."))
    105                 or name.lower().endswith(".py")):
    106                 check(fullname)
    107         return
    108 
    109     if verbose:
    110         print "checking", file, "...",
    111     try:
    112         f = io.open(file)
    113     except IOError, msg:
    114         errprint("%s: I/O Error: %s" % (file, str(msg)))
    115         return
    116 
    117     r = Reindenter(f)
    118     f.close()
    119 
    120     newline = r.newlines
    121     if isinstance(newline, tuple):
    122         errprint("%s: mixed newlines detected; cannot process file" % file)
    123         return
    124 
    125     if r.run():
    126         if verbose:
    127             print "changed."
    128             if dryrun:
    129                 print "But this is a dry run, so leaving it alone."
    130         if not dryrun:
    131             bak = file + ".bak"
    132             if makebackup:
    133                 shutil.copyfile(file, bak)
    134                 if verbose:
    135                     print "backed up", file, "to", bak
    136             f = io.open(file, "w", newline=newline)
    137             r.write(f)
    138             f.close()
    139             if verbose:
    140                 print "wrote new", file
    141         return True
    142     else:
    143         if verbose:
    144             print "unchanged."
    145         return False
    146 
    147 def _rstrip(line, JUNK='\n \t'):
    148     """Return line stripped of trailing spaces, tabs, newlines.
    149 
    150     Note that line.rstrip() instead also strips sundry control characters,
    151     but at least one known Emacs user expects to keep junk like that, not
    152     mentioning Barry by name or anything <wink>.
    153     """
    154 
    155     i = len(line)
    156     while i > 0 and line[i-1] in JUNK:
    157         i -= 1
    158     return line[:i]
    159 
    160 class Reindenter:
    161 
    162     def __init__(self, f):
    163         self.find_stmt = 1  # next token begins a fresh stmt?
    164         self.level = 0      # current indent level
    165 
    166         # Raw file lines.
    167         self.raw = f.readlines()
    168 
    169         # File lines, rstripped & tab-expanded.  Dummy at start is so
    170         # that we can use tokenize's 1-based line numbering easily.
    171         # Note that a line is all-blank iff it's "\n".
    172         self.lines = [_rstrip(line).expandtabs() + "\n"
    173                       for line in self.raw]
    174         self.lines.insert(0, None)
    175         self.index = 1  # index into self.lines of next line
    176 
    177         # List of (lineno, indentlevel) pairs, one for each stmt and
    178         # comment line.  indentlevel is -1 for comment lines, as a
    179         # signal that tokenize doesn't know what to do about them;
    180         # indeed, they're our headache!
    181         self.stats = []
    182 
    183         # Save the newlines found in the file so they can be used to
    184         #  create output without mutating the newlines.
    185         self.newlines = f.newlines
    186 
    187     def run(self):
    188         tokenize.tokenize(self.getline, self.tokeneater)
    189         # Remove trailing empty lines.
    190         lines = self.lines
    191         while lines and lines[-1] == "\n":
    192             lines.pop()
    193         # Sentinel.
    194         stats = self.stats
    195         stats.append((len(lines), 0))
    196         # Map count of leading spaces to # we want.
    197         have2want = {}
    198         # Program after transformation.
    199         after = self.after = []
    200         # Copy over initial empty lines -- there's nothing to do until
    201         # we see a line with *something* on it.
    202         i = stats[0][0]
    203         after.extend(lines[1:i])
    204         for i in range(len(stats)-1):
    205             thisstmt, thislevel = stats[i]
    206             nextstmt = stats[i+1][0]
    207             have = getlspace(lines[thisstmt])
    208             want = thislevel * 4
    209             if want < 0:
    210                 # A comment line.
    211                 if have:
    212                     # An indented comment line.  If we saw the same
    213                     # indentation before, reuse what it most recently
    214                     # mapped to.
    215                     want = have2want.get(have, -1)
    216                     if want < 0:
    217                         # Then it probably belongs to the next real stmt.
    218                         for j in xrange(i+1, len(stats)-1):
    219                             jline, jlevel = stats[j]
    220                             if jlevel >= 0:
    221                                 if have == getlspace(lines[jline]):
    222                                     want = jlevel * 4
    223                                 break
    224                     if want < 0:           # Maybe it's a hanging
    225                                            # comment like this one,
    226                         # in which case we should shift it like its base
    227                         # line got shifted.
    228                         for j in xrange(i-1, -1, -1):
    229                             jline, jlevel = stats[j]
    230                             if jlevel >= 0:
    231                                 want = have + getlspace(after[jline-1]) - \
    232                                        getlspace(lines[jline])
    233                                 break
    234                     if want < 0:
    235                         # Still no luck -- leave it alone.
    236                         want = have
    237                 else:
    238                     want = 0
    239             assert want >= 0
    240             have2want[have] = want
    241             diff = want - have
    242             if diff == 0 or have == 0:
    243                 after.extend(lines[thisstmt:nextstmt])
    244             else:
    245                 for line in lines[thisstmt:nextstmt]:
    246                     if diff > 0:
    247                         if line == "\n":
    248                             after.append(line)
    249                         else:
    250                             after.append(" " * diff + line)
    251                     else:
    252                         remove = min(getlspace(line), -diff)
    253                         after.append(line[remove:])
    254         return self.raw != self.after
    255 
    256     def write(self, f):
    257         f.writelines(self.after)
    258 
    259     # Line-getter for tokenize.
    260     def getline(self):
    261         if self.index >= len(self.lines):
    262             line = ""
    263         else:
    264             line = self.lines[self.index]
    265             self.index += 1
    266         return line
    267 
    268     # Line-eater for tokenize.
    269     def tokeneater(self, type, token, (sline, scol), end, line,
    270                    INDENT=tokenize.INDENT,
    271                    DEDENT=tokenize.DEDENT,
    272                    NEWLINE=tokenize.NEWLINE,
    273                    COMMENT=tokenize.COMMENT,
    274                    NL=tokenize.NL):
    275 
    276         if type == NEWLINE:
    277             # A program statement, or ENDMARKER, will eventually follow,
    278             # after some (possibly empty) run of tokens of the form
    279             #     (NL | COMMENT)* (INDENT | DEDENT+)?
    280             self.find_stmt = 1
    281 
    282         elif type == INDENT:
    283             self.find_stmt = 1
    284             self.level += 1
    285 
    286         elif type == DEDENT:
    287             self.find_stmt = 1
    288             self.level -= 1
    289 
    290         elif type == COMMENT:
    291             if self.find_stmt:
    292                 self.stats.append((sline, -1))
    293                 # but we're still looking for a new stmt, so leave
    294                 # find_stmt alone
    295 
    296         elif type == NL:
    297             pass
    298 
    299         elif self.find_stmt:
    300             # This is the first "real token" following a NEWLINE, so it
    301             # must be the first token of the next program statement, or an
    302             # ENDMARKER.
    303             self.find_stmt = 0
    304             if line:   # not endmarker
    305                 self.stats.append((sline, self.level))
    306 
    307 # Count number of leading blanks.
    308 def getlspace(line):
    309     i, n = 0, len(line)
    310     while i < n and line[i] == " ":
    311         i += 1
    312     return i
    313 
    314 if __name__ == '__main__':
    315     main()
    316