1 #! /usr/bin/env python 2 3 # Released to the public domain, by Tim Peters, 03 October 2000. 4 5 """reindent [-d][-r][-v] [ path ... ] 6 7 -d (--dryrun) Dry run. Analyze, but don't make any changes to, files. 8 -r (--recurse) Recurse. Search for all .py files in subdirectories too. 9 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting. 10 -v (--verbose) Verbose. Print informative msgs; else no output. 11 -h (--help) Help. Print this usage information and exit. 12 13 Change Python (.py) files to use 4-space indents and no hard tab characters. 14 Also trim excess spaces and tabs from ends of lines, and remove empty lines 15 at the end of files. Also ensure the last line ends with a newline. 16 17 If no paths are given on the command line, reindent operates as a filter, 18 reading a single source file from standard input and writing the transformed 19 source to standard output. In this case, the -d, -r and -v flags are 20 ignored. 21 22 You can pass one or more file and/or directory paths. When a directory 23 path, all .py files within the directory will be examined, and, if the -r 24 option is given, likewise recursively for subdirectories. 25 26 If output is not to standard output, reindent overwrites files in place, 27 renaming the originals with a .bak extension. If it finds nothing to 28 change, the file is left alone. If reindent does change a file, the changed 29 file is a fixed-point for future runs (i.e., running reindent on the 30 resulting .py file won't change it again). 31 32 The hard part of reindenting is figuring out what to do with comment 33 lines. So long as the input files get a clean bill of health from 34 tabnanny.py, reindent should do a good job. 35 36 The backup file is a copy of the one that is being reindented. The ".bak" 37 file is generated with shutil.copy(), but some corner cases regarding 38 user/group and permissions could leave the backup file more readable than 39 you'd prefer. You can always use the --nobackup option to prevent this. 40 """ 41 42 __version__ = "1" 43 44 import tokenize 45 import os, shutil 46 import sys 47 import io 48 49 verbose = 0 50 recurse = 0 51 dryrun = 0 52 makebackup = True 53 54 def usage(msg=None): 55 if msg is not None: 56 print >> sys.stderr, msg 57 print >> sys.stderr, __doc__ 58 59 def errprint(*args): 60 sep = "" 61 for arg in args: 62 sys.stderr.write(sep + str(arg)) 63 sep = " " 64 sys.stderr.write("\n") 65 66 def main(): 67 import getopt 68 global verbose, recurse, dryrun, makebackup 69 try: 70 opts, args = getopt.getopt(sys.argv[1:], "drnvh", 71 ["dryrun", "recurse", "nobackup", "verbose", "help"]) 72 except getopt.error, msg: 73 usage(msg) 74 return 75 for o, a in opts: 76 if o in ('-d', '--dryrun'): 77 dryrun += 1 78 elif o in ('-r', '--recurse'): 79 recurse += 1 80 elif o in ('-n', '--nobackup'): 81 makebackup = False 82 elif o in ('-v', '--verbose'): 83 verbose += 1 84 elif o in ('-h', '--help'): 85 usage() 86 return 87 if not args: 88 r = Reindenter(sys.stdin) 89 r.run() 90 r.write(sys.stdout) 91 return 92 for arg in args: 93 check(arg) 94 95 def check(file): 96 if os.path.isdir(file) and not os.path.islink(file): 97 if verbose: 98 print "listing directory", file 99 names = os.listdir(file) 100 for name in names: 101 fullname = os.path.join(file, name) 102 if ((recurse and os.path.isdir(fullname) and 103 not os.path.islink(fullname) and 104 not os.path.split(fullname)[1].startswith(".")) 105 or name.lower().endswith(".py")): 106 check(fullname) 107 return 108 109 if verbose: 110 print "checking", file, "...", 111 try: 112 f = io.open(file) 113 except IOError, msg: 114 errprint("%s: I/O Error: %s" % (file, str(msg))) 115 return 116 117 r = Reindenter(f) 118 f.close() 119 120 newline = r.newlines 121 if isinstance(newline, tuple): 122 errprint("%s: mixed newlines detected; cannot process file" % file) 123 return 124 125 if r.run(): 126 if verbose: 127 print "changed." 128 if dryrun: 129 print "But this is a dry run, so leaving it alone." 130 if not dryrun: 131 bak = file + ".bak" 132 if makebackup: 133 shutil.copyfile(file, bak) 134 if verbose: 135 print "backed up", file, "to", bak 136 f = io.open(file, "w", newline=newline) 137 r.write(f) 138 f.close() 139 if verbose: 140 print "wrote new", file 141 return True 142 else: 143 if verbose: 144 print "unchanged." 145 return False 146 147 def _rstrip(line, JUNK='\n \t'): 148 """Return line stripped of trailing spaces, tabs, newlines. 149 150 Note that line.rstrip() instead also strips sundry control characters, 151 but at least one known Emacs user expects to keep junk like that, not 152 mentioning Barry by name or anything <wink>. 153 """ 154 155 i = len(line) 156 while i > 0 and line[i-1] in JUNK: 157 i -= 1 158 return line[:i] 159 160 class Reindenter: 161 162 def __init__(self, f): 163 self.find_stmt = 1 # next token begins a fresh stmt? 164 self.level = 0 # current indent level 165 166 # Raw file lines. 167 self.raw = f.readlines() 168 169 # File lines, rstripped & tab-expanded. Dummy at start is so 170 # that we can use tokenize's 1-based line numbering easily. 171 # Note that a line is all-blank iff it's "\n". 172 self.lines = [_rstrip(line).expandtabs() + "\n" 173 for line in self.raw] 174 self.lines.insert(0, None) 175 self.index = 1 # index into self.lines of next line 176 177 # List of (lineno, indentlevel) pairs, one for each stmt and 178 # comment line. indentlevel is -1 for comment lines, as a 179 # signal that tokenize doesn't know what to do about them; 180 # indeed, they're our headache! 181 self.stats = [] 182 183 # Save the newlines found in the file so they can be used to 184 # create output without mutating the newlines. 185 self.newlines = f.newlines 186 187 def run(self): 188 tokenize.tokenize(self.getline, self.tokeneater) 189 # Remove trailing empty lines. 190 lines = self.lines 191 while lines and lines[-1] == "\n": 192 lines.pop() 193 # Sentinel. 194 stats = self.stats 195 stats.append((len(lines), 0)) 196 # Map count of leading spaces to # we want. 197 have2want = {} 198 # Program after transformation. 199 after = self.after = [] 200 # Copy over initial empty lines -- there's nothing to do until 201 # we see a line with *something* on it. 202 i = stats[0][0] 203 after.extend(lines[1:i]) 204 for i in range(len(stats)-1): 205 thisstmt, thislevel = stats[i] 206 nextstmt = stats[i+1][0] 207 have = getlspace(lines[thisstmt]) 208 want = thislevel * 4 209 if want < 0: 210 # A comment line. 211 if have: 212 # An indented comment line. If we saw the same 213 # indentation before, reuse what it most recently 214 # mapped to. 215 want = have2want.get(have, -1) 216 if want < 0: 217 # Then it probably belongs to the next real stmt. 218 for j in xrange(i+1, len(stats)-1): 219 jline, jlevel = stats[j] 220 if jlevel >= 0: 221 if have == getlspace(lines[jline]): 222 want = jlevel * 4 223 break 224 if want < 0: # Maybe it's a hanging 225 # comment like this one, 226 # in which case we should shift it like its base 227 # line got shifted. 228 for j in xrange(i-1, -1, -1): 229 jline, jlevel = stats[j] 230 if jlevel >= 0: 231 want = have + getlspace(after[jline-1]) - \ 232 getlspace(lines[jline]) 233 break 234 if want < 0: 235 # Still no luck -- leave it alone. 236 want = have 237 else: 238 want = 0 239 assert want >= 0 240 have2want[have] = want 241 diff = want - have 242 if diff == 0 or have == 0: 243 after.extend(lines[thisstmt:nextstmt]) 244 else: 245 for line in lines[thisstmt:nextstmt]: 246 if diff > 0: 247 if line == "\n": 248 after.append(line) 249 else: 250 after.append(" " * diff + line) 251 else: 252 remove = min(getlspace(line), -diff) 253 after.append(line[remove:]) 254 return self.raw != self.after 255 256 def write(self, f): 257 f.writelines(self.after) 258 259 # Line-getter for tokenize. 260 def getline(self): 261 if self.index >= len(self.lines): 262 line = "" 263 else: 264 line = self.lines[self.index] 265 self.index += 1 266 return line 267 268 # Line-eater for tokenize. 269 def tokeneater(self, type, token, (sline, scol), end, line, 270 INDENT=tokenize.INDENT, 271 DEDENT=tokenize.DEDENT, 272 NEWLINE=tokenize.NEWLINE, 273 COMMENT=tokenize.COMMENT, 274 NL=tokenize.NL): 275 276 if type == NEWLINE: 277 # A program statement, or ENDMARKER, will eventually follow, 278 # after some (possibly empty) run of tokens of the form 279 # (NL | COMMENT)* (INDENT | DEDENT+)? 280 self.find_stmt = 1 281 282 elif type == INDENT: 283 self.find_stmt = 1 284 self.level += 1 285 286 elif type == DEDENT: 287 self.find_stmt = 1 288 self.level -= 1 289 290 elif type == COMMENT: 291 if self.find_stmt: 292 self.stats.append((sline, -1)) 293 # but we're still looking for a new stmt, so leave 294 # find_stmt alone 295 296 elif type == NL: 297 pass 298 299 elif self.find_stmt: 300 # This is the first "real token" following a NEWLINE, so it 301 # must be the first token of the next program statement, or an 302 # ENDMARKER. 303 self.find_stmt = 0 304 if line: # not endmarker 305 self.stats.append((sline, self.level)) 306 307 # Count number of leading blanks. 308 def getlspace(line): 309 i, n = 0, len(line) 310 while i < n and line[i] == " ": 311 i += 1 312 return i 313 314 if __name__ == '__main__': 315 main() 316