Home | History | Annotate | Download | only in scripts
      1 #! /usr/bin/env python

      2 
      3 # This file contains a class and a main program that perform three

      4 # related (though complimentary) formatting operations on Python

      5 # programs.  When called as "pindent -c", it takes a valid Python

      6 # program as input and outputs a version augmented with block-closing

      7 # comments.  When called as "pindent -d", it assumes its input is a

      8 # Python program with block-closing comments and outputs a commentless

      9 # version.   When called as "pindent -r" it assumes its input is a

     10 # Python program with block-closing comments but with its indentation

     11 # messed up, and outputs a properly indented version.

     12 
     13 # A "block-closing comment" is a comment of the form '# end <keyword>'

     14 # where <keyword> is the keyword that opened the block.  If the

     15 # opening keyword is 'def' or 'class', the function or class name may

     16 # be repeated in the block-closing comment as well.  Here is an

     17 # example of a program fully augmented with block-closing comments:

     18 
     19 # def foobar(a, b):

     20 #    if a == b:

     21 #        a = a+1

     22 #    elif a < b:

     23 #        b = b-1

     24 #        if b > a: a = a-1

     25 #        # end if

     26 #    else:

     27 #        print 'oops!'

     28 #    # end if

     29 # # end def foobar

     30 
     31 # Note that only the last part of an if...elif...else... block needs a

     32 # block-closing comment; the same is true for other compound

     33 # statements (e.g. try...except).  Also note that "short-form" blocks

     34 # like the second 'if' in the example must be closed as well;

     35 # otherwise the 'else' in the example would be ambiguous (remember

     36 # that indentation is not significant when interpreting block-closing

     37 # comments).

     38 
     39 # The operations are idempotent (i.e. applied to their own output

     40 # they yield an identical result).  Running first "pindent -c" and

     41 # then "pindent -r" on a valid Python program produces a program that

     42 # is semantically identical to the input (though its indentation may

     43 # be different). Running "pindent -e" on that output produces a

     44 # program that only differs from the original in indentation.

     45 
     46 # Other options:

     47 # -s stepsize: set the indentation step size (default 8)

     48 # -t tabsize : set the number of spaces a tab character is worth (default 8)

     49 # -e         : expand TABs into spaces

     50 # file ...   : input file(s) (default standard input)

     51 # The results always go to standard output

     52 
     53 # Caveats:

     54 # - comments ending in a backslash will be mistaken for continued lines

     55 # - continuations using backslash are always left unchanged

     56 # - continuations inside parentheses are not extra indented by -r

     57 #   but must be indented for -c to work correctly (this breaks

     58 #   idempotency!)

     59 # - continued lines inside triple-quoted strings are totally garbled

     60 
     61 # Secret feature:

     62 # - On input, a block may also be closed with an "end statement" --

     63 #   this is a block-closing comment without the '#' sign.

     64 
     65 # Possible improvements:

     66 # - check syntax based on transitions in 'next' table

     67 # - better error reporting

     68 # - better error recovery

     69 # - check identifier after class/def

     70 
     71 # The following wishes need a more complete tokenization of the source:

     72 # - Don't get fooled by comments ending in backslash

     73 # - reindent continuation lines indicated by backslash

     74 # - handle continuation lines inside parentheses/braces/brackets

     75 # - handle triple quoted strings spanning lines

     76 # - realign comments

     77 # - optionally do much more thorough reformatting, a la C indent

     78 
     79 # Defaults

     80 STEPSIZE = 8
     81 TABSIZE = 8
     82 EXPANDTABS = 0
     83 
     84 import re
     85 import sys
     86 
     87 next = {}
     88 next['if'] = next['elif'] = 'elif', 'else', 'end'
     89 next['while'] = next['for'] = 'else', 'end'
     90 next['try'] = 'except', 'finally'
     91 next['except'] = 'except', 'else', 'finally', 'end'
     92 next['else'] = next['finally'] = next['def'] = next['class'] = 'end'
     93 next['end'] = ()
     94 start = 'if', 'while', 'for', 'try', 'with', 'def', 'class'
     95 
     96 class PythonIndenter:
     97 
     98     def __init__(self, fpi = sys.stdin, fpo = sys.stdout,
     99                  indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    100         self.fpi = fpi
    101         self.fpo = fpo
    102         self.indentsize = indentsize
    103         self.tabsize = tabsize
    104         self.lineno = 0
    105         self.expandtabs = expandtabs
    106         self._write = fpo.write
    107         self.kwprog = re.compile(
    108                 r'^\s*(?P<kw>[a-z]+)'
    109                 r'(\s+(?P<id>[a-zA-Z_]\w*))?'
    110                 r'[^\w]')
    111         self.endprog = re.compile(
    112                 r'^\s*#?\s*end\s+(?P<kw>[a-z]+)'
    113                 r'(\s+(?P<id>[a-zA-Z_]\w*))?'
    114                 r'[^\w]')
    115         self.wsprog = re.compile(r'^[ \t]*')
    116     # end def __init__

    117 
    118     def write(self, line):
    119         if self.expandtabs:
    120             self._write(line.expandtabs(self.tabsize))
    121         else:
    122             self._write(line)
    123         # end if

    124     # end def write

    125 
    126     def readline(self):
    127         line = self.fpi.readline()
    128         if line: self.lineno = self.lineno + 1
    129         # end if

    130         return line
    131     # end def readline

    132 
    133     def error(self, fmt, *args):
    134         if args: fmt = fmt % args
    135         # end if

    136         sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt))
    137         self.write('### %s ###\n' % fmt)
    138     # end def error

    139 
    140     def getline(self):
    141         line = self.readline()
    142         while line[-2:] == '\\\n':
    143             line2 = self.readline()
    144             if not line2: break
    145             # end if

    146             line = line + line2
    147         # end while

    148         return line
    149     # end def getline

    150 
    151     def putline(self, line, indent = None):
    152         if indent is None:
    153             self.write(line)
    154             return
    155         # end if

    156         tabs, spaces = divmod(indent*self.indentsize, self.tabsize)
    157         i = 0
    158         m = self.wsprog.match(line)
    159         if m: i = m.end()
    160         # end if

    161         self.write('\t'*tabs + ' '*spaces + line[i:])
    162     # end def putline

    163 
    164     def reformat(self):
    165         stack = []
    166         while 1:
    167             line = self.getline()
    168             if not line: break      # EOF

    169             # end if

    170             m = self.endprog.match(line)
    171             if m:
    172                 kw = 'end'
    173                 kw2 = m.group('kw')
    174                 if not stack:
    175                     self.error('unexpected end')
    176                 elif stack[-1][0] != kw2:
    177                     self.error('unmatched end')
    178                 # end if

    179                 del stack[-1:]
    180                 self.putline(line, len(stack))
    181                 continue
    182             # end if

    183             m = self.kwprog.match(line)
    184             if m:
    185                 kw = m.group('kw')
    186                 if kw in start:
    187                     self.putline(line, len(stack))
    188                     stack.append((kw, kw))
    189                     continue
    190                 # end if

    191                 if next.has_key(kw) and stack:
    192                     self.putline(line, len(stack)-1)
    193                     kwa, kwb = stack[-1]
    194                     stack[-1] = kwa, kw
    195                     continue
    196                 # end if

    197             # end if

    198             self.putline(line, len(stack))
    199         # end while

    200         if stack:
    201             self.error('unterminated keywords')
    202             for kwa, kwb in stack:
    203                 self.write('\t%s\n' % kwa)
    204             # end for

    205         # end if

    206     # end def reformat

    207 
    208     def delete(self):
    209         begin_counter = 0
    210         end_counter = 0
    211         while 1:
    212             line = self.getline()
    213             if not line: break      # EOF

    214             # end if

    215             m = self.endprog.match(line)
    216             if m:
    217                 end_counter = end_counter + 1
    218                 continue
    219             # end if

    220             m = self.kwprog.match(line)
    221             if m:
    222                 kw = m.group('kw')
    223                 if kw in start:
    224                     begin_counter = begin_counter + 1
    225                 # end if

    226             # end if

    227             self.putline(line)
    228         # end while

    229         if begin_counter - end_counter < 0:
    230             sys.stderr.write('Warning: input contained more end tags than expected\n')
    231         elif begin_counter - end_counter > 0:
    232             sys.stderr.write('Warning: input contained less end tags than expected\n')
    233         # end if

    234     # end def delete

    235 
    236     def complete(self):
    237         self.indentsize = 1
    238         stack = []
    239         todo = []
    240         thisid = ''
    241         current, firstkw, lastkw, topid = 0, '', '', ''
    242         while 1:
    243             line = self.getline()
    244             i = 0
    245             m = self.wsprog.match(line)
    246             if m: i = m.end()
    247             # end if

    248             m = self.endprog.match(line)
    249             if m:
    250                 thiskw = 'end'
    251                 endkw = m.group('kw')
    252                 thisid = m.group('id')
    253             else:
    254                 m = self.kwprog.match(line)
    255                 if m:
    256                     thiskw = m.group('kw')
    257                     if not next.has_key(thiskw):
    258                         thiskw = ''
    259                     # end if

    260                     if thiskw in ('def', 'class'):
    261                         thisid = m.group('id')
    262                     else:
    263                         thisid = ''
    264                     # end if

    265                 elif line[i:i+1] in ('\n', '#'):
    266                     todo.append(line)
    267                     continue
    268                 else:
    269                     thiskw = ''
    270                 # end if

    271             # end if

    272             indent = len(line[:i].expandtabs(self.tabsize))
    273             while indent < current:
    274                 if firstkw:
    275                     if topid:
    276                         s = '# end %s %s\n' % (
    277                                 firstkw, topid)
    278                     else:
    279                         s = '# end %s\n' % firstkw
    280                     # end if

    281                     self.putline(s, current)
    282                     firstkw = lastkw = ''
    283                 # end if

    284                 current, firstkw, lastkw, topid = stack[-1]
    285                 del stack[-1]
    286             # end while

    287             if indent == current and firstkw:
    288                 if thiskw == 'end':
    289                     if endkw != firstkw:
    290                         self.error('mismatched end')
    291                     # end if

    292                     firstkw = lastkw = ''
    293                 elif not thiskw or thiskw in start:
    294                     if topid:
    295                         s = '# end %s %s\n' % (
    296                                 firstkw, topid)
    297                     else:
    298                         s = '# end %s\n' % firstkw
    299                     # end if

    300                     self.putline(s, current)
    301                     firstkw = lastkw = topid = ''
    302                 # end if

    303             # end if

    304             if indent > current:
    305                 stack.append((current, firstkw, lastkw, topid))
    306                 if thiskw and thiskw not in start:
    307                     # error

    308                     thiskw = ''
    309                 # end if

    310                 current, firstkw, lastkw, topid = \
    311                          indent, thiskw, thiskw, thisid
    312             # end if

    313             if thiskw:
    314                 if thiskw in start:
    315                     firstkw = lastkw = thiskw
    316                     topid = thisid
    317                 else:
    318                     lastkw = thiskw
    319                 # end if

    320             # end if

    321             for l in todo: self.write(l)
    322             # end for

    323             todo = []
    324             if not line: break
    325             # end if

    326             self.write(line)
    327         # end while

    328     # end def complete

    329 
    330 # end class PythonIndenter

    331 
    332 # Simplified user interface

    333 # - xxx_filter(input, output): read and write file objects

    334 # - xxx_string(s): take and return string object

    335 # - xxx_file(filename): process file in place, return true iff changed

    336 
    337 def complete_filter(input = sys.stdin, output = sys.stdout,
    338                     stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    339     pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
    340     pi.complete()
    341 # end def complete_filter

    342 
    343 def delete_filter(input= sys.stdin, output = sys.stdout,
    344                         stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    345     pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
    346     pi.delete()
    347 # end def delete_filter

    348 
    349 def reformat_filter(input = sys.stdin, output = sys.stdout,
    350                     stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    351     pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
    352     pi.reformat()
    353 # end def reformat_filter

    354 
    355 class StringReader:
    356     def __init__(self, buf):
    357         self.buf = buf
    358         self.pos = 0
    359         self.len = len(self.buf)
    360     # end def __init__

    361     def read(self, n = 0):
    362         if n <= 0:
    363             n = self.len - self.pos
    364         else:
    365             n = min(n, self.len - self.pos)
    366         # end if

    367         r = self.buf[self.pos : self.pos + n]
    368         self.pos = self.pos + n
    369         return r
    370     # end def read

    371     def readline(self):
    372         i = self.buf.find('\n', self.pos)
    373         return self.read(i + 1 - self.pos)
    374     # end def readline

    375     def readlines(self):
    376         lines = []
    377         line = self.readline()
    378         while line:
    379             lines.append(line)
    380             line = self.readline()
    381         # end while

    382         return lines
    383     # end def readlines

    384     # seek/tell etc. are left as an exercise for the reader

    385 # end class StringReader

    386 
    387 class StringWriter:
    388     def __init__(self):
    389         self.buf = ''
    390     # end def __init__

    391     def write(self, s):
    392         self.buf = self.buf + s
    393     # end def write

    394     def getvalue(self):
    395         return self.buf
    396     # end def getvalue

    397 # end class StringWriter

    398 
    399 def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    400     input = StringReader(source)
    401     output = StringWriter()
    402     pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
    403     pi.complete()
    404     return output.getvalue()
    405 # end def complete_string

    406 
    407 def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    408     input = StringReader(source)
    409     output = StringWriter()
    410     pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
    411     pi.delete()
    412     return output.getvalue()
    413 # end def delete_string

    414 
    415 def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    416     input = StringReader(source)
    417     output = StringWriter()
    418     pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
    419     pi.reformat()
    420     return output.getvalue()
    421 # end def reformat_string

    422 
    423 def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    424     source = open(filename, 'r').read()
    425     result = complete_string(source, stepsize, tabsize, expandtabs)
    426     if source == result: return 0
    427     # end if

    428     import os
    429     try: os.rename(filename, filename + '~')
    430     except os.error: pass
    431     # end try

    432     f = open(filename, 'w')
    433     f.write(result)
    434     f.close()
    435     return 1
    436 # end def complete_file

    437 
    438 def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    439     source = open(filename, 'r').read()
    440     result = delete_string(source, stepsize, tabsize, expandtabs)
    441     if source == result: return 0
    442     # end if

    443     import os
    444     try: os.rename(filename, filename + '~')
    445     except os.error: pass
    446     # end try

    447     f = open(filename, 'w')
    448     f.write(result)
    449     f.close()
    450     return 1
    451 # end def delete_file

    452 
    453 def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    454     source = open(filename, 'r').read()
    455     result = reformat_string(source, stepsize, tabsize, expandtabs)
    456     if source == result: return 0
    457     # end if

    458     import os
    459     try: os.rename(filename, filename + '~')
    460     except os.error: pass
    461     # end try

    462     f = open(filename, 'w')
    463     f.write(result)
    464     f.close()
    465     return 1
    466 # end def reformat_file

    467 
    468 # Test program when called as a script

    469 
    470 usage = """
    471 usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ...
    472 -c         : complete a correctly indented program (add #end directives)
    473 -d         : delete #end directives
    474 -r         : reformat a completed program (use #end directives)
    475 -s stepsize: indentation step (default %(STEPSIZE)d)
    476 -t tabsize : the worth in spaces of a tab (default %(TABSIZE)d)
    477 -e         : expand TABs into spaces (defailt OFF)
    478 [file] ... : files are changed in place, with backups in file~
    479 If no files are specified or a single - is given,
    480 the program acts as a filter (reads stdin, writes stdout).
    481 """ % vars()
    482 
    483 def error_both(op1, op2):
    484     sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n')
    485     sys.stderr.write(usage)
    486     sys.exit(2)
    487 # end def error_both

    488 
    489 def test():
    490     import getopt
    491     try:
    492         opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e')
    493     except getopt.error, msg:
    494         sys.stderr.write('Error: %s\n' % msg)
    495         sys.stderr.write(usage)
    496         sys.exit(2)
    497     # end try

    498     action = None
    499     stepsize = STEPSIZE
    500     tabsize = TABSIZE
    501     expandtabs = EXPANDTABS
    502     for o, a in opts:
    503         if o == '-c':
    504             if action: error_both(o, action)
    505             # end if

    506             action = 'complete'
    507         elif o == '-d':
    508             if action: error_both(o, action)
    509             # end if

    510             action = 'delete'
    511         elif o == '-r':
    512             if action: error_both(o, action)
    513             # end if

    514             action = 'reformat'
    515         elif o == '-s':
    516             stepsize = int(a)
    517         elif o == '-t':
    518             tabsize = int(a)
    519         elif o == '-e':
    520             expandtabs = 1
    521         # end if

    522     # end for

    523     if not action:
    524         sys.stderr.write(
    525                 'You must specify -c(omplete), -d(elete) or -r(eformat)\n')
    526         sys.stderr.write(usage)
    527         sys.exit(2)
    528     # end if

    529     if not args or args == ['-']:
    530         action = eval(action + '_filter')
    531         action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs)
    532     else:
    533         action = eval(action + '_file')
    534         for filename in args:
    535             action(filename, stepsize, tabsize, expandtabs)
    536         # end for

    537     # end if

    538 # end def test

    539 
    540 if __name__ == '__main__':
    541     test()
    542 # end if

    543