1 #! /usr/bin/env python 2 3 # Released to the public domain, by Tim Peters, 03 October 2000. 4 5 """reindent [-d][-r][-v] [ path ... ] 6 7 -d (--dryrun) Dry run. Analyze, but don't make any changes to, files. 8 -r (--recurse) Recurse. Search for all .py files in subdirectories too. 9 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting. 10 -v (--verbose) Verbose. Print informative msgs; else no output. 11 -h (--help) Help. Print this usage information and exit. 12 13 Change Python (.py) files to use 4-space indents and no hard tab characters. 14 Also trim excess spaces and tabs from ends of lines, and remove empty lines 15 at the end of files. Also ensure the last line ends with a newline. 16 17 If no paths are given on the command line, reindent operates as a filter, 18 reading a single source file from standard input and writing the transformed 19 source to standard output. In this case, the -d, -r and -v flags are 20 ignored. 21 22 You can pass one or more file and/or directory paths. When a directory 23 path, all .py files within the directory will be examined, and, if the -r 24 option is given, likewise recursively for subdirectories. 25 26 If output is not to standard output, reindent overwrites files in place, 27 renaming the originals with a .bak extension. If it finds nothing to 28 change, the file is left alone. If reindent does change a file, the changed 29 file is a fixed-point for future runs (i.e., running reindent on the 30 resulting .py file won't change it again). 31 32 The hard part of reindenting is figuring out what to do with comment 33 lines. So long as the input files get a clean bill of health from 34 tabnanny.py, reindent should do a good job. 35 36 The backup file is a copy of the one that is being reindented. The ".bak" 37 file is generated with shutil.copy(), but some corner cases regarding 38 user/group and permissions could leave the backup file more readable that 39 you'd prefer. You can always use the --nobackup option to prevent this. 40 """ 41 42 __version__ = "1" 43 44 import tokenize 45 import os, shutil 46 import sys 47 48 verbose = 0 49 recurse = 0 50 dryrun = 0 51 makebackup = True 52 53 def usage(msg=None): 54 if msg is not None: 55 print >> sys.stderr, msg 56 print >> sys.stderr, __doc__ 57 58 def errprint(*args): 59 sep = "" 60 for arg in args: 61 sys.stderr.write(sep + str(arg)) 62 sep = " " 63 sys.stderr.write("\n") 64 65 def main(): 66 import getopt 67 global verbose, recurse, dryrun, makebackup 68 try: 69 opts, args = getopt.getopt(sys.argv[1:], "drnvh", 70 ["dryrun", "recurse", "nobackup", "verbose", "help"]) 71 except getopt.error, msg: 72 usage(msg) 73 return 74 for o, a in opts: 75 if o in ('-d', '--dryrun'): 76 dryrun += 1 77 elif o in ('-r', '--recurse'): 78 recurse += 1 79 elif o in ('-n', '--nobackup'): 80 makebackup = False 81 elif o in ('-v', '--verbose'): 82 verbose += 1 83 elif o in ('-h', '--help'): 84 usage() 85 return 86 if not args: 87 r = Reindenter(sys.stdin) 88 r.run() 89 r.write(sys.stdout) 90 return 91 for arg in args: 92 check(arg) 93 94 def check(file): 95 if os.path.isdir(file) and not os.path.islink(file): 96 if verbose: 97 print "listing directory", file 98 names = os.listdir(file) 99 for name in names: 100 fullname = os.path.join(file, name) 101 if ((recurse and os.path.isdir(fullname) and 102 not os.path.islink(fullname) and 103 not os.path.split(fullname)[1].startswith(".")) 104 or name.lower().endswith(".py")): 105 check(fullname) 106 return 107 108 if verbose: 109 print "checking", file, "...", 110 try: 111 f = open(file) 112 except IOError, msg: 113 errprint("%s: I/O Error: %s" % (file, str(msg))) 114 return 115 116 r = Reindenter(f) 117 f.close() 118 if r.run(): 119 if verbose: 120 print "changed." 121 if dryrun: 122 print "But this is a dry run, so leaving it alone." 123 if not dryrun: 124 bak = file + ".bak" 125 if makebackup: 126 shutil.copyfile(file, bak) 127 if verbose: 128 print "backed up", file, "to", bak 129 f = open(file, "w") 130 r.write(f) 131 f.close() 132 if verbose: 133 print "wrote new", file 134 return True 135 else: 136 if verbose: 137 print "unchanged." 138 return False 139 140 def _rstrip(line, JUNK='\n \t'): 141 """Return line stripped of trailing spaces, tabs, newlines. 142 143 Note that line.rstrip() instead also strips sundry control characters, 144 but at least one known Emacs user expects to keep junk like that, not 145 mentioning Barry by name or anything <wink>. 146 """ 147 148 i = len(line) 149 while i > 0 and line[i-1] in JUNK: 150 i -= 1 151 return line[:i] 152 153 class Reindenter: 154 155 def __init__(self, f): 156 self.find_stmt = 1 # next token begins a fresh stmt? 157 self.level = 0 # current indent level 158 159 # Raw file lines. 160 self.raw = f.readlines() 161 162 # File lines, rstripped & tab-expanded. Dummy at start is so 163 # that we can use tokenize's 1-based line numbering easily. 164 # Note that a line is all-blank iff it's "\n". 165 self.lines = [_rstrip(line).expandtabs() + "\n" 166 for line in self.raw] 167 self.lines.insert(0, None) 168 self.index = 1 # index into self.lines of next line 169 170 # List of (lineno, indentlevel) pairs, one for each stmt and 171 # comment line. indentlevel is -1 for comment lines, as a 172 # signal that tokenize doesn't know what to do about them; 173 # indeed, they're our headache! 174 self.stats = [] 175 176 def run(self): 177 tokenize.tokenize(self.getline, self.tokeneater) 178 # Remove trailing empty lines. 179 lines = self.lines 180 while lines and lines[-1] == "\n": 181 lines.pop() 182 # Sentinel. 183 stats = self.stats 184 stats.append((len(lines), 0)) 185 # Map count of leading spaces to # we want. 186 have2want = {} 187 # Program after transformation. 188 after = self.after = [] 189 # Copy over initial empty lines -- there's nothing to do until 190 # we see a line with *something* on it. 191 i = stats[0][0] 192 after.extend(lines[1:i]) 193 for i in range(len(stats)-1): 194 thisstmt, thislevel = stats[i] 195 nextstmt = stats[i+1][0] 196 have = getlspace(lines[thisstmt]) 197 want = thislevel * 4 198 if want < 0: 199 # A comment line. 200 if have: 201 # An indented comment line. If we saw the same 202 # indentation before, reuse what it most recently 203 # mapped to. 204 want = have2want.get(have, -1) 205 if want < 0: 206 # Then it probably belongs to the next real stmt. 207 for j in xrange(i+1, len(stats)-1): 208 jline, jlevel = stats[j] 209 if jlevel >= 0: 210 if have == getlspace(lines[jline]): 211 want = jlevel * 4 212 break 213 if want < 0: # Maybe it's a hanging 214 # comment like this one, 215 # in which case we should shift it like its base 216 # line got shifted. 217 for j in xrange(i-1, -1, -1): 218 jline, jlevel = stats[j] 219 if jlevel >= 0: 220 want = have + getlspace(after[jline-1]) - \ 221 getlspace(lines[jline]) 222 break 223 if want < 0: 224 # Still no luck -- leave it alone. 225 want = have 226 else: 227 want = 0 228 assert want >= 0 229 have2want[have] = want 230 diff = want - have 231 if diff == 0 or have == 0: 232 after.extend(lines[thisstmt:nextstmt]) 233 else: 234 for line in lines[thisstmt:nextstmt]: 235 if diff > 0: 236 if line == "\n": 237 after.append(line) 238 else: 239 after.append(" " * diff + line) 240 else: 241 remove = min(getlspace(line), -diff) 242 after.append(line[remove:]) 243 return self.raw != self.after 244 245 def write(self, f): 246 f.writelines(self.after) 247 248 # Line-getter for tokenize. 249 def getline(self): 250 if self.index >= len(self.lines): 251 line = "" 252 else: 253 line = self.lines[self.index] 254 self.index += 1 255 return line 256 257 # Line-eater for tokenize. 258 def tokeneater(self, type, token, (sline, scol), end, line, 259 INDENT=tokenize.INDENT, 260 DEDENT=tokenize.DEDENT, 261 NEWLINE=tokenize.NEWLINE, 262 COMMENT=tokenize.COMMENT, 263 NL=tokenize.NL): 264 265 if type == NEWLINE: 266 # A program statement, or ENDMARKER, will eventually follow, 267 # after some (possibly empty) run of tokens of the form 268 # (NL | COMMENT)* (INDENT | DEDENT+)? 269 self.find_stmt = 1 270 271 elif type == INDENT: 272 self.find_stmt = 1 273 self.level += 1 274 275 elif type == DEDENT: 276 self.find_stmt = 1 277 self.level -= 1 278 279 elif type == COMMENT: 280 if self.find_stmt: 281 self.stats.append((sline, -1)) 282 # but we're still looking for a new stmt, so leave 283 # find_stmt alone 284 285 elif type == NL: 286 pass 287 288 elif self.find_stmt: 289 # This is the first "real token" following a NEWLINE, so it 290 # must be the first token of the next program statement, or an 291 # ENDMARKER. 292 self.find_stmt = 0 293 if line: # not endmarker 294 self.stats.append((sline, self.level)) 295 296 # Count number of leading blanks. 297 def getlspace(line): 298 i, n = 0, len(line) 299 while i < n and line[i] == " ": 300 i += 1 301 return i 302 303 if __name__ == '__main__': 304 main() 305