Home | History | Annotate | Download | only in scripts
      1 #! /usr/bin/env python3
      2 
      3 # Released to the public domain, by Tim Peters, 03 October 2000.
      4 
      5 """reindent [-d][-r][-v] [ path ... ]
      6 
      7 -d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
      8 -r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
      9 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
     10 -v (--verbose)  Verbose.   Print informative msgs; else no output.
     11    (--newline)  Newline.   Specify the newline character to use (CRLF, LF).
     12                            Default is the same as the original file.
     13 -h (--help)     Help.      Print this usage information and exit.
     14 
     15 Change Python (.py) files to use 4-space indents and no hard tab characters.
     16 Also trim excess spaces and tabs from ends of lines, and remove empty lines
     17 at the end of files.  Also ensure the last line ends with a newline.
     18 
     19 If no paths are given on the command line, reindent operates as a filter,
     20 reading a single source file from standard input and writing the transformed
     21 source to standard output.  In this case, the -d, -r and -v flags are
     22 ignored.
     23 
     24 You can pass one or more file and/or directory paths.  When a directory
     25 path, all .py files within the directory will be examined, and, if the -r
     26 option is given, likewise recursively for subdirectories.
     27 
     28 If output is not to standard output, reindent overwrites files in place,
     29 renaming the originals with a .bak extension.  If it finds nothing to
     30 change, the file is left alone.  If reindent does change a file, the changed
     31 file is a fixed-point for future runs (i.e., running reindent on the
     32 resulting .py file won't change it again).
     33 
     34 The hard part of reindenting is figuring out what to do with comment
     35 lines.  So long as the input files get a clean bill of health from
     36 tabnanny.py, reindent should do a good job.
     37 
     38 The backup file is a copy of the one that is being reindented. The ".bak"
     39 file is generated with shutil.copy(), but some corner cases regarding
     40 user/group and permissions could leave the backup file more readable than
     41 you'd prefer. You can always use the --nobackup option to prevent this.
     42 """
     43 
     44 __version__ = "1"
     45 
     46 import tokenize
     47 import os
     48 import shutil
     49 import sys
     50 
     51 verbose = False
     52 recurse = False
     53 dryrun = False
     54 makebackup = True
     55 # A specified newline to be used in the output (set by --newline option)
     56 spec_newline = None
     57 
     58 
     59 def usage(msg=None):
     60     if msg is None:
     61         msg = __doc__
     62     print(msg, file=sys.stderr)
     63 
     64 
     65 def errprint(*args):
     66     sys.stderr.write(" ".join(str(arg) for arg in args))
     67     sys.stderr.write("\n")
     68 
     69 def main():
     70     import getopt
     71     global verbose, recurse, dryrun, makebackup, spec_newline
     72     try:
     73         opts, args = getopt.getopt(sys.argv[1:], "drnvh",
     74             ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"])
     75     except getopt.error as msg:
     76         usage(msg)
     77         return
     78     for o, a in opts:
     79         if o in ('-d', '--dryrun'):
     80             dryrun = True
     81         elif o in ('-r', '--recurse'):
     82             recurse = True
     83         elif o in ('-n', '--nobackup'):
     84             makebackup = False
     85         elif o in ('-v', '--verbose'):
     86             verbose = True
     87         elif o in ('--newline',):
     88             if not a.upper() in ('CRLF', 'LF'):
     89                 usage()
     90                 return
     91             spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()]
     92         elif o in ('-h', '--help'):
     93             usage()
     94             return
     95     if not args:
     96         r = Reindenter(sys.stdin)
     97         r.run()
     98         r.write(sys.stdout)
     99         return
    100     for arg in args:
    101         check(arg)
    102 
    103 
    104 def check(file):
    105     if os.path.isdir(file) and not os.path.islink(file):
    106         if verbose:
    107             print("listing directory", file)
    108         names = os.listdir(file)
    109         for name in names:
    110             fullname = os.path.join(file, name)
    111             if ((recurse and os.path.isdir(fullname) and
    112                  not os.path.islink(fullname) and
    113                  not os.path.split(fullname)[1].startswith("."))
    114                 or name.lower().endswith(".py")):
    115                 check(fullname)
    116         return
    117 
    118     if verbose:
    119         print("checking", file, "...", end=' ')
    120     with open(file, 'rb') as f:
    121         encoding, _ = tokenize.detect_encoding(f.readline)
    122     try:
    123         with open(file, encoding=encoding) as f:
    124             r = Reindenter(f)
    125     except IOError as msg:
    126         errprint("%s: I/O Error: %s" % (file, str(msg)))
    127         return
    128 
    129     newline = spec_newline if spec_newline else r.newlines
    130     if isinstance(newline, tuple):
    131         errprint("%s: mixed newlines detected; cannot continue without --newline" % file)
    132         return
    133 
    134     if r.run():
    135         if verbose:
    136             print("changed.")
    137             if dryrun:
    138                 print("But this is a dry run, so leaving it alone.")
    139         if not dryrun:
    140             bak = file + ".bak"
    141             if makebackup:
    142                 shutil.copyfile(file, bak)
    143                 if verbose:
    144                     print("backed up", file, "to", bak)
    145             with open(file, "w", encoding=encoding, newline=newline) as f:
    146                 r.write(f)
    147             if verbose:
    148                 print("wrote new", file)
    149         return True
    150     else:
    151         if verbose:
    152             print("unchanged.")
    153         return False
    154 
    155 
    156 def _rstrip(line, JUNK='\n \t'):
    157     """Return line stripped of trailing spaces, tabs, newlines.
    158 
    159     Note that line.rstrip() instead also strips sundry control characters,
    160     but at least one known Emacs user expects to keep junk like that, not
    161     mentioning Barry by name or anything <wink>.
    162     """
    163 
    164     i = len(line)
    165     while i > 0 and line[i - 1] in JUNK:
    166         i -= 1
    167     return line[:i]
    168 
    169 
    170 class Reindenter:
    171 
    172     def __init__(self, f):
    173         self.find_stmt = 1  # next token begins a fresh stmt?
    174         self.level = 0      # current indent level
    175 
    176         # Raw file lines.
    177         self.raw = f.readlines()
    178 
    179         # File lines, rstripped & tab-expanded.  Dummy at start is so
    180         # that we can use tokenize's 1-based line numbering easily.
    181         # Note that a line is all-blank iff it's "\n".
    182         self.lines = [_rstrip(line).expandtabs() + "\n"
    183                       for line in self.raw]
    184         self.lines.insert(0, None)
    185         self.index = 1  # index into self.lines of next line
    186 
    187         # List of (lineno, indentlevel) pairs, one for each stmt and
    188         # comment line.  indentlevel is -1 for comment lines, as a
    189         # signal that tokenize doesn't know what to do about them;
    190         # indeed, they're our headache!
    191         self.stats = []
    192 
    193         # Save the newlines found in the file so they can be used to
    194         #  create output without mutating the newlines.
    195         self.newlines = f.newlines
    196 
    197     def run(self):
    198         tokens = tokenize.generate_tokens(self.getline)
    199         for _token in tokens:
    200             self.tokeneater(*_token)
    201         # Remove trailing empty lines.
    202         lines = self.lines
    203         while lines and lines[-1] == "\n":
    204             lines.pop()
    205         # Sentinel.
    206         stats = self.stats
    207         stats.append((len(lines), 0))
    208         # Map count of leading spaces to # we want.
    209         have2want = {}
    210         # Program after transformation.
    211         after = self.after = []
    212         # Copy over initial empty lines -- there's nothing to do until
    213         # we see a line with *something* on it.
    214         i = stats[0][0]
    215         after.extend(lines[1:i])
    216         for i in range(len(stats) - 1):
    217             thisstmt, thislevel = stats[i]
    218             nextstmt = stats[i + 1][0]
    219             have = getlspace(lines[thisstmt])
    220             want = thislevel * 4
    221             if want < 0:
    222                 # A comment line.
    223                 if have:
    224                     # An indented comment line.  If we saw the same
    225                     # indentation before, reuse what it most recently
    226                     # mapped to.
    227                     want = have2want.get(have, -1)
    228                     if want < 0:
    229                         # Then it probably belongs to the next real stmt.
    230                         for j in range(i + 1, len(stats) - 1):
    231                             jline, jlevel = stats[j]
    232                             if jlevel >= 0:
    233                                 if have == getlspace(lines[jline]):
    234                                     want = jlevel * 4
    235                                 break
    236                     if want < 0:           # Maybe it's a hanging
    237                                            # comment like this one,
    238                         # in which case we should shift it like its base
    239                         # line got shifted.
    240                         for j in range(i - 1, -1, -1):
    241                             jline, jlevel = stats[j]
    242                             if jlevel >= 0:
    243                                 want = have + (getlspace(after[jline - 1]) -
    244                                                getlspace(lines[jline]))
    245                                 break
    246                     if want < 0:
    247                         # Still no luck -- leave it alone.
    248                         want = have
    249                 else:
    250                     want = 0
    251             assert want >= 0
    252             have2want[have] = want
    253             diff = want - have
    254             if diff == 0 or have == 0:
    255                 after.extend(lines[thisstmt:nextstmt])
    256             else:
    257                 for line in lines[thisstmt:nextstmt]:
    258                     if diff > 0:
    259                         if line == "\n":
    260                             after.append(line)
    261                         else:
    262                             after.append(" " * diff + line)
    263                     else:
    264                         remove = min(getlspace(line), -diff)
    265                         after.append(line[remove:])
    266         return self.raw != self.after
    267 
    268     def write(self, f):
    269         f.writelines(self.after)
    270 
    271     # Line-getter for tokenize.
    272     def getline(self):
    273         if self.index >= len(self.lines):
    274             line = ""
    275         else:
    276             line = self.lines[self.index]
    277             self.index += 1
    278         return line
    279 
    280     # Line-eater for tokenize.
    281     def tokeneater(self, type, token, slinecol, end, line,
    282                    INDENT=tokenize.INDENT,
    283                    DEDENT=tokenize.DEDENT,
    284                    NEWLINE=tokenize.NEWLINE,
    285                    COMMENT=tokenize.COMMENT,
    286                    NL=tokenize.NL):
    287 
    288         if type == NEWLINE:
    289             # A program statement, or ENDMARKER, will eventually follow,
    290             # after some (possibly empty) run of tokens of the form
    291             #     (NL | COMMENT)* (INDENT | DEDENT+)?
    292             self.find_stmt = 1
    293 
    294         elif type == INDENT:
    295             self.find_stmt = 1
    296             self.level += 1
    297 
    298         elif type == DEDENT:
    299             self.find_stmt = 1
    300             self.level -= 1
    301 
    302         elif type == COMMENT:
    303             if self.find_stmt:
    304                 self.stats.append((slinecol[0], -1))
    305                 # but we're still looking for a new stmt, so leave
    306                 # find_stmt alone
    307 
    308         elif type == NL:
    309             pass
    310 
    311         elif self.find_stmt:
    312             # This is the first "real token" following a NEWLINE, so it
    313             # must be the first token of the next program statement, or an
    314             # ENDMARKER.
    315             self.find_stmt = 0
    316             if line:   # not endmarker
    317                 self.stats.append((slinecol[0], self.level))
    318 
    319 
    320 # Count number of leading blanks.
    321 def getlspace(line):
    322     i, n = 0, len(line)
    323     while i < n and line[i] == " ":
    324         i += 1
    325     return i
    326 
    327 
    328 if __name__ == '__main__':
    329     main()
    330