1 #! /usr/bin/env python 2 3 # This file contains a class and a main program that perform three 4 # related (though complimentary) formatting operations on Python 5 # programs. When called as "pindent -c", it takes a valid Python 6 # program as input and outputs a version augmented with block-closing 7 # comments. When called as "pindent -d", it assumes its input is a 8 # Python program with block-closing comments and outputs a commentless 9 # version. When called as "pindent -r" it assumes its input is a 10 # Python program with block-closing comments but with its indentation 11 # messed up, and outputs a properly indented version. 12 13 # A "block-closing comment" is a comment of the form '# end <keyword>' 14 # where <keyword> is the keyword that opened the block. If the 15 # opening keyword is 'def' or 'class', the function or class name may 16 # be repeated in the block-closing comment as well. Here is an 17 # example of a program fully augmented with block-closing comments: 18 19 # def foobar(a, b): 20 # if a == b: 21 # a = a+1 22 # elif a < b: 23 # b = b-1 24 # if b > a: a = a-1 25 # # end if 26 # else: 27 # print 'oops!' 28 # # end if 29 # # end def foobar 30 31 # Note that only the last part of an if...elif...else... block needs a 32 # block-closing comment; the same is true for other compound 33 # statements (e.g. try...except). Also note that "short-form" blocks 34 # like the second 'if' in the example must be closed as well; 35 # otherwise the 'else' in the example would be ambiguous (remember 36 # that indentation is not significant when interpreting block-closing 37 # comments). 38 39 # The operations are idempotent (i.e. applied to their own output 40 # they yield an identical result). Running first "pindent -c" and 41 # then "pindent -r" on a valid Python program produces a program that 42 # is semantically identical to the input (though its indentation may 43 # be different). Running "pindent -e" on that output produces a 44 # program that only differs from the original in indentation. 45 46 # Other options: 47 # -s stepsize: set the indentation step size (default 8) 48 # -t tabsize : set the number of spaces a tab character is worth (default 8) 49 # -e : expand TABs into spaces 50 # file ... : input file(s) (default standard input) 51 # The results always go to standard output 52 53 # Caveats: 54 # - comments ending in a backslash will be mistaken for continued lines 55 # - continuations using backslash are always left unchanged 56 # - continuations inside parentheses are not extra indented by -r 57 # but must be indented for -c to work correctly (this breaks 58 # idempotency!) 59 # - continued lines inside triple-quoted strings are totally garbled 60 61 # Secret feature: 62 # - On input, a block may also be closed with an "end statement" -- 63 # this is a block-closing comment without the '#' sign. 64 65 # Possible improvements: 66 # - check syntax based on transitions in 'next' table 67 # - better error reporting 68 # - better error recovery 69 # - check identifier after class/def 70 71 # The following wishes need a more complete tokenization of the source: 72 # - Don't get fooled by comments ending in backslash 73 # - reindent continuation lines indicated by backslash 74 # - handle continuation lines inside parentheses/braces/brackets 75 # - handle triple quoted strings spanning lines 76 # - realign comments 77 # - optionally do much more thorough reformatting, a la C indent 78 79 from __future__ import print_function 80 81 # Defaults 82 STEPSIZE = 8 83 TABSIZE = 8 84 EXPANDTABS = False 85 86 import io 87 import re 88 import sys 89 90 next = {} 91 next['if'] = next['elif'] = 'elif', 'else', 'end' 92 next['while'] = next['for'] = 'else', 'end' 93 next['try'] = 'except', 'finally' 94 next['except'] = 'except', 'else', 'finally', 'end' 95 next['else'] = next['finally'] = next['with'] = \ 96 next['def'] = next['class'] = 'end' 97 next['end'] = () 98 start = 'if', 'while', 'for', 'try', 'with', 'def', 'class' 99 100 class PythonIndenter: 101 102 def __init__(self, fpi = sys.stdin, fpo = sys.stdout, 103 indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 104 self.fpi = fpi 105 self.fpo = fpo 106 self.indentsize = indentsize 107 self.tabsize = tabsize 108 self.lineno = 0 109 self.expandtabs = expandtabs 110 self._write = fpo.write 111 self.kwprog = re.compile( 112 r'^(?:\s|\\\n)*(?P<kw>[a-z]+)' 113 r'((?:\s|\\\n)+(?P<id>[a-zA-Z_]\w*))?' 114 r'[^\w]') 115 self.endprog = re.compile( 116 r'^(?:\s|\\\n)*#?\s*end\s+(?P<kw>[a-z]+)' 117 r'(\s+(?P<id>[a-zA-Z_]\w*))?' 118 r'[^\w]') 119 self.wsprog = re.compile(r'^[ \t]*') 120 # end def __init__ 121 122 def write(self, line): 123 if self.expandtabs: 124 self._write(line.expandtabs(self.tabsize)) 125 else: 126 self._write(line) 127 # end if 128 # end def write 129 130 def readline(self): 131 line = self.fpi.readline() 132 if line: self.lineno += 1 133 # end if 134 return line 135 # end def readline 136 137 def error(self, fmt, *args): 138 if args: fmt = fmt % args 139 # end if 140 sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt)) 141 self.write('### %s ###\n' % fmt) 142 # end def error 143 144 def getline(self): 145 line = self.readline() 146 while line[-2:] == '\\\n': 147 line2 = self.readline() 148 if not line2: break 149 # end if 150 line += line2 151 # end while 152 return line 153 # end def getline 154 155 def putline(self, line, indent): 156 tabs, spaces = divmod(indent*self.indentsize, self.tabsize) 157 i = self.wsprog.match(line).end() 158 line = line[i:] 159 if line[:1] not in ('\n', '\r', ''): 160 line = '\t'*tabs + ' '*spaces + line 161 # end if 162 self.write(line) 163 # end def putline 164 165 def reformat(self): 166 stack = [] 167 while True: 168 line = self.getline() 169 if not line: break # EOF 170 # end if 171 m = self.endprog.match(line) 172 if m: 173 kw = 'end' 174 kw2 = m.group('kw') 175 if not stack: 176 self.error('unexpected end') 177 elif stack.pop()[0] != kw2: 178 self.error('unmatched end') 179 # end if 180 self.putline(line, len(stack)) 181 continue 182 # end if 183 m = self.kwprog.match(line) 184 if m: 185 kw = m.group('kw') 186 if kw in start: 187 self.putline(line, len(stack)) 188 stack.append((kw, kw)) 189 continue 190 # end if 191 if next.has_key(kw) and stack: 192 self.putline(line, len(stack)-1) 193 kwa, kwb = stack[-1] 194 stack[-1] = kwa, kw 195 continue 196 # end if 197 # end if 198 self.putline(line, len(stack)) 199 # end while 200 if stack: 201 self.error('unterminated keywords') 202 for kwa, kwb in stack: 203 self.write('\t%s\n' % kwa) 204 # end for 205 # end if 206 # end def reformat 207 208 def delete(self): 209 begin_counter = 0 210 end_counter = 0 211 while True: 212 line = self.getline() 213 if not line: break # EOF 214 # end if 215 m = self.endprog.match(line) 216 if m: 217 end_counter += 1 218 continue 219 # end if 220 m = self.kwprog.match(line) 221 if m: 222 kw = m.group('kw') 223 if kw in start: 224 begin_counter += 1 225 # end if 226 # end if 227 self.write(line) 228 # end while 229 if begin_counter - end_counter < 0: 230 sys.stderr.write('Warning: input contained more end tags than expected\n') 231 elif begin_counter - end_counter > 0: 232 sys.stderr.write('Warning: input contained less end tags than expected\n') 233 # end if 234 # end def delete 235 236 def complete(self): 237 stack = [] 238 todo = [] 239 currentws = thisid = firstkw = lastkw = topid = '' 240 while True: 241 line = self.getline() 242 i = self.wsprog.match(line).end() 243 m = self.endprog.match(line) 244 if m: 245 thiskw = 'end' 246 endkw = m.group('kw') 247 thisid = m.group('id') 248 else: 249 m = self.kwprog.match(line) 250 if m: 251 thiskw = m.group('kw') 252 if not next.has_key(thiskw): 253 thiskw = '' 254 # end if 255 if thiskw in ('def', 'class'): 256 thisid = m.group('id') 257 else: 258 thisid = '' 259 # end if 260 elif line[i:i+1] in ('\n', '#'): 261 todo.append(line) 262 continue 263 else: 264 thiskw = '' 265 # end if 266 # end if 267 indentws = line[:i] 268 indent = len(indentws.expandtabs(self.tabsize)) 269 current = len(currentws.expandtabs(self.tabsize)) 270 while indent < current: 271 if firstkw: 272 if topid: 273 s = '# end %s %s\n' % ( 274 firstkw, topid) 275 else: 276 s = '# end %s\n' % firstkw 277 # end if 278 self.write(currentws + s) 279 firstkw = lastkw = '' 280 # end if 281 currentws, firstkw, lastkw, topid = stack.pop() 282 current = len(currentws.expandtabs(self.tabsize)) 283 # end while 284 if indent == current and firstkw: 285 if thiskw == 'end': 286 if endkw != firstkw: 287 self.error('mismatched end') 288 # end if 289 firstkw = lastkw = '' 290 elif not thiskw or thiskw in start: 291 if topid: 292 s = '# end %s %s\n' % ( 293 firstkw, topid) 294 else: 295 s = '# end %s\n' % firstkw 296 # end if 297 self.write(currentws + s) 298 firstkw = lastkw = topid = '' 299 # end if 300 # end if 301 if indent > current: 302 stack.append((currentws, firstkw, lastkw, topid)) 303 if thiskw and thiskw not in start: 304 # error 305 thiskw = '' 306 # end if 307 currentws, firstkw, lastkw, topid = \ 308 indentws, thiskw, thiskw, thisid 309 # end if 310 if thiskw: 311 if thiskw in start: 312 firstkw = lastkw = thiskw 313 topid = thisid 314 else: 315 lastkw = thiskw 316 # end if 317 # end if 318 for l in todo: self.write(l) 319 # end for 320 todo = [] 321 if not line: break 322 # end if 323 self.write(line) 324 # end while 325 # end def complete 326 # end class PythonIndenter 327 328 # Simplified user interface 329 # - xxx_filter(input, output): read and write file objects 330 # - xxx_string(s): take and return string object 331 # - xxx_file(filename): process file in place, return true iff changed 332 333 def complete_filter(input = sys.stdin, output = sys.stdout, 334 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 335 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 336 pi.complete() 337 # end def complete_filter 338 339 def delete_filter(input= sys.stdin, output = sys.stdout, 340 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 341 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 342 pi.delete() 343 # end def delete_filter 344 345 def reformat_filter(input = sys.stdin, output = sys.stdout, 346 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 347 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 348 pi.reformat() 349 # end def reformat_filter 350 351 def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 352 input = io.BytesIO(source) 353 output = io.BytesIO() 354 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 355 pi.complete() 356 return output.getvalue() 357 # end def complete_string 358 359 def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 360 input = io.BytesIO(source) 361 output = io.BytesIO() 362 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 363 pi.delete() 364 return output.getvalue() 365 # end def delete_string 366 367 def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 368 input = io.BytesIO(source) 369 output = io.BytesIO() 370 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 371 pi.reformat() 372 return output.getvalue() 373 # end def reformat_string 374 375 def make_backup(filename): 376 import os, os.path 377 backup = filename + '~' 378 if os.path.lexists(backup): 379 try: 380 os.remove(backup) 381 except os.error: 382 print("Can't remove backup %r" % (backup,), file=sys.stderr) 383 # end try 384 # end if 385 try: 386 os.rename(filename, backup) 387 except os.error: 388 print("Can't rename %r to %r" % (filename, backup), file=sys.stderr) 389 # end try 390 # end def make_backup 391 392 def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 393 with open(filename, 'r') as f: 394 source = f.read() 395 # end with 396 result = complete_string(source, stepsize, tabsize, expandtabs) 397 if source == result: return 0 398 # end if 399 make_backup(filename) 400 with open(filename, 'w') as f: 401 f.write(result) 402 # end with 403 return 1 404 # end def complete_file 405 406 def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 407 with open(filename, 'r') as f: 408 source = f.read() 409 # end with 410 result = delete_string(source, stepsize, tabsize, expandtabs) 411 if source == result: return 0 412 # end if 413 make_backup(filename) 414 with open(filename, 'w') as f: 415 f.write(result) 416 # end with 417 return 1 418 # end def delete_file 419 420 def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 421 with open(filename, 'r') as f: 422 source = f.read() 423 # end with 424 result = reformat_string(source, stepsize, tabsize, expandtabs) 425 if source == result: return 0 426 # end if 427 make_backup(filename) 428 with open(filename, 'w') as f: 429 f.write(result) 430 # end with 431 return 1 432 # end def reformat_file 433 434 # Test program when called as a script 435 436 usage = """ 437 usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ... 438 -c : complete a correctly indented program (add #end directives) 439 -d : delete #end directives 440 -r : reformat a completed program (use #end directives) 441 -s stepsize: indentation step (default %(STEPSIZE)d) 442 -t tabsize : the worth in spaces of a tab (default %(TABSIZE)d) 443 -e : expand TABs into spaces (default OFF) 444 [file] ... : files are changed in place, with backups in file~ 445 If no files are specified or a single - is given, 446 the program acts as a filter (reads stdin, writes stdout). 447 """ % vars() 448 449 def error_both(op1, op2): 450 sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n') 451 sys.stderr.write(usage) 452 sys.exit(2) 453 # end def error_both 454 455 def test(): 456 import getopt 457 try: 458 opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e') 459 except getopt.error, msg: 460 sys.stderr.write('Error: %s\n' % msg) 461 sys.stderr.write(usage) 462 sys.exit(2) 463 # end try 464 action = None 465 stepsize = STEPSIZE 466 tabsize = TABSIZE 467 expandtabs = EXPANDTABS 468 for o, a in opts: 469 if o == '-c': 470 if action: error_both(o, action) 471 # end if 472 action = 'complete' 473 elif o == '-d': 474 if action: error_both(o, action) 475 # end if 476 action = 'delete' 477 elif o == '-r': 478 if action: error_both(o, action) 479 # end if 480 action = 'reformat' 481 elif o == '-s': 482 stepsize = int(a) 483 elif o == '-t': 484 tabsize = int(a) 485 elif o == '-e': 486 expandtabs = True 487 # end if 488 # end for 489 if not action: 490 sys.stderr.write( 491 'You must specify -c(omplete), -d(elete) or -r(eformat)\n') 492 sys.stderr.write(usage) 493 sys.exit(2) 494 # end if 495 if not args or args == ['-']: 496 action = eval(action + '_filter') 497 action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs) 498 else: 499 action = eval(action + '_file') 500 for filename in args: 501 action(filename, stepsize, tabsize, expandtabs) 502 # end for 503 # end if 504 # end def test 505 506 if __name__ == '__main__': 507 test() 508 # end if 509