Home | History | Annotate | Download | only in utils
      1 #! /usr/bin/env python
      2 
      3 # Released to the public domain, by Tim Peters, 03 October 2000.
      4 
      5 """reindent [-d][-r][-v] [ path ... ]
      6 
      7 -d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
      8 -r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
      9 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
     10 -v (--verbose)  Verbose.   Print informative msgs; else no output.
     11 -h (--help)     Help.      Print this usage information and exit.
     12 
     13 Change Python (.py) files to use 4-space indents and no hard tab characters.
     14 Also trim excess spaces and tabs from ends of lines, and remove empty lines
     15 at the end of files.  Also ensure the last line ends with a newline.
     16 
     17 If no paths are given on the command line, reindent operates as a filter,
     18 reading a single source file from standard input and writing the transformed
     19 source to standard output.  In this case, the -d, -r and -v flags are
     20 ignored.
     21 
     22 You can pass one or more file and/or directory paths.  When a directory
     23 path, all .py files within the directory will be examined, and, if the -r
     24 option is given, likewise recursively for subdirectories.
     25 
     26 If output is not to standard output, reindent overwrites files in place,
     27 renaming the originals with a .bak extension.  If it finds nothing to
     28 change, the file is left alone.  If reindent does change a file, the changed
     29 file is a fixed-point for future runs (i.e., running reindent on the
     30 resulting .py file won't change it again).
     31 
     32 The hard part of reindenting is figuring out what to do with comment
     33 lines.  So long as the input files get a clean bill of health from
     34 tabnanny.py, reindent should do a good job.
     35 
     36 The backup file is a copy of the one that is being reindented. The ".bak"
     37 file is generated with shutil.copy(), but some corner cases regarding
     38 user/group and permissions could leave the backup file more readable that
     39 you'd prefer. You can always use the --nobackup option to prevent this.
     40 """
     41 
     42 __version__ = "1"
     43 
     44 import tokenize
     45 import os, shutil
     46 import sys
     47 
     48 verbose    = 0
     49 recurse    = 0
     50 dryrun     = 0
     51 makebackup = True
     52 
     53 def usage(msg=None):
     54     if msg is not None:
     55         print >> sys.stderr, msg
     56     print >> sys.stderr, __doc__
     57 
     58 def errprint(*args):
     59     sep = ""
     60     for arg in args:
     61         sys.stderr.write(sep + str(arg))
     62         sep = " "
     63     sys.stderr.write("\n")
     64 
     65 def main():
     66     import getopt
     67     global verbose, recurse, dryrun, makebackup
     68     try:
     69         opts, args = getopt.getopt(sys.argv[1:], "drnvh",
     70                         ["dryrun", "recurse", "nobackup", "verbose", "help"])
     71     except getopt.error, msg:
     72         usage(msg)
     73         return
     74     for o, a in opts:
     75         if o in ('-d', '--dryrun'):
     76             dryrun += 1
     77         elif o in ('-r', '--recurse'):
     78             recurse += 1
     79         elif o in ('-n', '--nobackup'):
     80             makebackup = False
     81         elif o in ('-v', '--verbose'):
     82             verbose += 1
     83         elif o in ('-h', '--help'):
     84             usage()
     85             return
     86     if not args:
     87         r = Reindenter(sys.stdin)
     88         r.run()
     89         r.write(sys.stdout)
     90         return
     91     for arg in args:
     92         check(arg)
     93 
     94 def check(file):
     95     if os.path.isdir(file) and not os.path.islink(file):
     96         if verbose:
     97             print "listing directory", file
     98         names = os.listdir(file)
     99         for name in names:
    100             fullname = os.path.join(file, name)
    101             if ((recurse and os.path.isdir(fullname) and
    102                  not os.path.islink(fullname))
    103                 or name.lower().endswith(".py")):
    104                 check(fullname)
    105         return
    106 
    107     if verbose:
    108         print "checking", file, "...",
    109     try:
    110         f = open(file)
    111     except IOError, msg:
    112         errprint("%s: I/O Error: %s" % (file, str(msg)))
    113         return
    114 
    115     r = Reindenter(f)
    116     f.close()
    117     if r.run():
    118         if verbose:
    119             print "changed."
    120             if dryrun:
    121                 print "But this is a dry run, so leaving it alone."
    122         if not dryrun:
    123             bak = file + ".bak"
    124             if makebackup:
    125                 shutil.copyfile(file, bak)
    126                 if verbose:
    127                     print "backed up", file, "to", bak
    128             f = open(file, "w")
    129             r.write(f)
    130             f.close()
    131             if verbose:
    132                 print "wrote new", file
    133         return True
    134     else:
    135         if verbose:
    136             print "unchanged."
    137         return False
    138 
    139 def _rstrip(line, JUNK='\n \t'):
    140     """Return line stripped of trailing spaces, tabs, newlines.
    141 
    142     Note that line.rstrip() instead also strips sundry control characters,
    143     but at least one known Emacs user expects to keep junk like that, not
    144     mentioning Barry by name or anything <wink>.
    145     """
    146 
    147     i = len(line)
    148     while i > 0 and line[i-1] in JUNK:
    149         i -= 1
    150     return line[:i]
    151 
    152 class Reindenter:
    153 
    154     def __init__(self, f):
    155         self.find_stmt = 1  # next token begins a fresh stmt?
    156         self.level = 0      # current indent level
    157 
    158         # Raw file lines.
    159         self.raw = f.readlines()
    160 
    161         # File lines, rstripped & tab-expanded.  Dummy at start is so
    162         # that we can use tokenize's 1-based line numbering easily.
    163         # Note that a line is all-blank iff it's "\n".
    164         self.lines = [_rstrip(line).expandtabs() + "\n"
    165                       for line in self.raw]
    166         self.lines.insert(0, None)
    167         self.index = 1  # index into self.lines of next line
    168 
    169         # List of (lineno, indentlevel) pairs, one for each stmt and
    170         # comment line.  indentlevel is -1 for comment lines, as a
    171         # signal that tokenize doesn't know what to do about them;
    172         # indeed, they're our headache!
    173         self.stats = []
    174 
    175     def run(self):
    176         tokenize.tokenize(self.getline, self.tokeneater)
    177         # Remove trailing empty lines.
    178         lines = self.lines
    179         while lines and lines[-1] == "\n":
    180             lines.pop()
    181         # Sentinel.
    182         stats = self.stats
    183         stats.append((len(lines), 0))
    184         # Map count of leading spaces to # we want.
    185         have2want = {}
    186         # Program after transformation.
    187         after = self.after = []
    188         # Copy over initial empty lines -- there's nothing to do until
    189         # we see a line with *something* on it.
    190         i = stats[0][0]
    191         after.extend(lines[1:i])
    192         for i in range(len(stats)-1):
    193             thisstmt, thislevel = stats[i]
    194             nextstmt = stats[i+1][0]
    195             have = getlspace(lines[thisstmt])
    196             want = thislevel * 4
    197             if want < 0:
    198                 # A comment line.
    199                 if have:
    200                     # An indented comment line.  If we saw the same
    201                     # indentation before, reuse what it most recently
    202                     # mapped to.
    203                     want = have2want.get(have, -1)
    204                     if want < 0:
    205                         # Then it probably belongs to the next real stmt.
    206                         for j in xrange(i+1, len(stats)-1):
    207                             jline, jlevel = stats[j]
    208                             if jlevel >= 0:
    209                                 if have == getlspace(lines[jline]):
    210                                     want = jlevel * 4
    211                                 break
    212                     if want < 0:           # Maybe it's a hanging
    213                                            # comment like this one,
    214                         # in which case we should shift it like its base
    215                         # line got shifted.
    216                         for j in xrange(i-1, -1, -1):
    217                             jline, jlevel = stats[j]
    218                             if jlevel >= 0:
    219                                 want = have + getlspace(after[jline-1]) - \
    220                                        getlspace(lines[jline])
    221                                 break
    222                     if want < 0:
    223                         # Still no luck -- leave it alone.
    224                         want = have
    225                 else:
    226                     want = 0
    227             assert want >= 0
    228             have2want[have] = want
    229             diff = want - have
    230             if diff == 0 or have == 0:
    231                 after.extend(lines[thisstmt:nextstmt])
    232             else:
    233                 for line in lines[thisstmt:nextstmt]:
    234                     if diff > 0:
    235                         if line == "\n":
    236                             after.append(line)
    237                         else:
    238                             after.append(" " * diff + line)
    239                     else:
    240                         remove = min(getlspace(line), -diff)
    241                         after.append(line[remove:])
    242         return self.raw != self.after
    243 
    244     def write(self, f):
    245         f.writelines(self.after)
    246 
    247     # Line-getter for tokenize.
    248     def getline(self):
    249         if self.index >= len(self.lines):
    250             line = ""
    251         else:
    252             line = self.lines[self.index]
    253             self.index += 1
    254         return line
    255 
    256     # Line-eater for tokenize.
    257     def tokeneater(self, type, token, (sline, scol), end, line,
    258                    INDENT=tokenize.INDENT,
    259                    DEDENT=tokenize.DEDENT,
    260                    NEWLINE=tokenize.NEWLINE,
    261                    COMMENT=tokenize.COMMENT,
    262                    NL=tokenize.NL):
    263 
    264         if type == NEWLINE:
    265             # A program statement, or ENDMARKER, will eventually follow,
    266             # after some (possibly empty) run of tokens of the form
    267             #     (NL | COMMENT)* (INDENT | DEDENT+)?
    268             self.find_stmt = 1
    269 
    270         elif type == INDENT:
    271             self.find_stmt = 1
    272             self.level += 1
    273 
    274         elif type == DEDENT:
    275             self.find_stmt = 1
    276             self.level -= 1
    277 
    278         elif type == COMMENT:
    279             if self.find_stmt:
    280                 self.stats.append((sline, -1))
    281                 # but we're still looking for a new stmt, so leave
    282                 # find_stmt alone
    283 
    284         elif type == NL:
    285             pass
    286 
    287         elif self.find_stmt:
    288             # This is the first "real token" following a NEWLINE, so it
    289             # must be the first token of the next program statement, or an
    290             # ENDMARKER.
    291             self.find_stmt = 0
    292             if line:   # not endmarker
    293                 self.stats.append((sline, self.level))
    294 
    295 # Count number of leading blanks.
    296 def getlspace(line):
    297     i, n = 0, len(line)
    298     while i < n and line[i] == " ":
    299         i += 1
    300     return i
    301 
    302 if __name__ == '__main__':
    303     main()
    304