Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python3
      2 '''Add syntax highlighting to Python source code'''
      3 
      4 __author__ = 'Raymond Hettinger'
      5 
      6 import builtins
      7 import functools
      8 import html as html_module
      9 import keyword
     10 import re
     11 import tokenize
     12 
     13 #### Analyze Python Source #################################
     14 
     15 def is_builtin(s):
     16     'Return True if s is the name of a builtin'
     17     return hasattr(builtins, s)
     18 
     19 def combine_range(lines, start, end):
     20     'Join content from a range of lines between start and end'
     21     (srow, scol), (erow, ecol) = start, end
     22     if srow == erow:
     23         return lines[srow-1][scol:ecol], end
     24     rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]]
     25     return ''.join(rows), end
     26 
     27 def analyze_python(source):
     28     '''Generate and classify chunks of Python for syntax highlighting.
     29        Yields tuples in the form: (category, categorized_text).
     30     '''
     31     lines = source.splitlines(True)
     32     lines.append('')
     33     readline = functools.partial(next, iter(lines), '')
     34     kind = tok_str = ''
     35     tok_type = tokenize.COMMENT
     36     written = (1, 0)
     37     for tok in tokenize.generate_tokens(readline):
     38         prev_tok_type, prev_tok_str = tok_type, tok_str
     39         tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok
     40         kind = ''
     41         if tok_type == tokenize.COMMENT:
     42             kind = 'comment'
     43         elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@':
     44             kind = 'operator'
     45         elif tok_type == tokenize.STRING:
     46             kind = 'string'
     47             if prev_tok_type == tokenize.INDENT or scol==0:
     48                 kind = 'docstring'
     49         elif tok_type == tokenize.NAME:
     50             if tok_str in ('def', 'class', 'import', 'from'):
     51                 kind = 'definition'
     52             elif prev_tok_str in ('def', 'class'):
     53                 kind = 'defname'
     54             elif keyword.iskeyword(tok_str):
     55                 kind = 'keyword'
     56             elif is_builtin(tok_str) and prev_tok_str != '.':
     57                 kind = 'builtin'
     58         if kind:
     59             text, written = combine_range(lines, written, (srow, scol))
     60             yield '', text
     61             text, written = tok_str, (erow, ecol)
     62             yield kind, text
     63     line_upto_token, written = combine_range(lines, written, (erow, ecol))
     64     yield '', line_upto_token
     65 
     66 #### Raw Output  ###########################################
     67 
     68 def raw_highlight(classified_text):
     69     'Straight text display of text classifications'
     70     result = []
     71     for kind, text in classified_text:
     72         result.append('%15s:  %r\n' % (kind or 'plain', text))
     73     return ''.join(result)
     74 
     75 #### ANSI Output ###########################################
     76 
     77 default_ansi = {
     78     'comment': ('\033[0;31m', '\033[0m'),
     79     'string': ('\033[0;32m', '\033[0m'),
     80     'docstring': ('\033[0;32m', '\033[0m'),
     81     'keyword': ('\033[0;33m', '\033[0m'),
     82     'builtin': ('\033[0;35m', '\033[0m'),
     83     'definition': ('\033[0;33m', '\033[0m'),
     84     'defname': ('\033[0;34m', '\033[0m'),
     85     'operator': ('\033[0;33m', '\033[0m'),
     86 }
     87 
     88 def ansi_highlight(classified_text, colors=default_ansi):
     89     'Add syntax highlighting to source code using ANSI escape sequences'
     90     # http://en.wikipedia.org/wiki/ANSI_escape_code
     91     result = []
     92     for kind, text in classified_text:
     93         opener, closer = colors.get(kind, ('', ''))
     94         result += [opener, text, closer]
     95     return ''.join(result)
     96 
     97 #### HTML Output ###########################################
     98 
     99 def html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'):
    100     'Convert classified text to an HTML fragment'
    101     result = [opener]
    102     for kind, text in classified_text:
    103         if kind:
    104             result.append('<span class="%s">' % kind)
    105         result.append(html_module.escape(text))
    106         if kind:
    107             result.append('</span>')
    108     result.append(closer)
    109     return ''.join(result)
    110 
    111 default_css = {
    112     '.comment': '{color: crimson;}',
    113     '.string':  '{color: forestgreen;}',
    114     '.docstring': '{color: forestgreen; font-style:italic;}',
    115     '.keyword': '{color: darkorange;}',
    116     '.builtin': '{color: purple;}',
    117     '.definition': '{color: darkorange; font-weight:bold;}',
    118     '.defname': '{color: blue;}',
    119     '.operator': '{color: brown;}',
    120 }
    121 
    122 default_html = '''\
    123 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
    124           "http://www.w3.org/TR/html4/strict.dtd">
    125 <html>
    126 <head>
    127 <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
    128 <title> {title} </title>
    129 <style type="text/css">
    130 {css}
    131 </style>
    132 </head>
    133 <body>
    134 {body}
    135 </body>
    136 </html>
    137 '''
    138 
    139 def build_html_page(classified_text, title='python',
    140                     css=default_css, html=default_html):
    141     'Create a complete HTML page with colorized source code'
    142     css_str = '\n'.join(['%s %s' % item for item in css.items()])
    143     result = html_highlight(classified_text)
    144     title = html_module.escape(title)
    145     return html.format(title=title, css=css_str, body=result)
    146 
    147 #### LaTeX Output ##########################################
    148 
    149 default_latex_commands = {
    150     'comment': r'{\color{red}#1}',
    151     'string': r'{\color{ForestGreen}#1}',
    152     'docstring': r'{\emph{\color{ForestGreen}#1}}',
    153     'keyword': r'{\color{orange}#1}',
    154     'builtin': r'{\color{purple}#1}',
    155     'definition': r'{\color{orange}#1}',
    156     'defname': r'{\color{blue}#1}',
    157     'operator': r'{\color{brown}#1}',
    158 }
    159 
    160 default_latex_document = r'''
    161 \documentclass{article}
    162 \usepackage{alltt}
    163 \usepackage{upquote}
    164 \usepackage{color}
    165 \usepackage[usenames,dvipsnames]{xcolor}
    166 \usepackage[cm]{fullpage}
    167 %(macros)s
    168 \begin{document}
    169 \center{\LARGE{%(title)s}}
    170 \begin{alltt}
    171 %(body)s
    172 \end{alltt}
    173 \end{document}
    174 '''
    175 
    176 def alltt_escape(s):
    177     'Replace backslash and braces with their escaped equivalents'
    178     xlat = {'{': r'\{', '}': r'\}', '\\': r'\textbackslash{}'}
    179     return re.sub(r'[\\{}]', lambda mo: xlat[mo.group()], s)
    180 
    181 def latex_highlight(classified_text, title = 'python',
    182                     commands = default_latex_commands,
    183                     document = default_latex_document):
    184     'Create a complete LaTeX document with colorized source code'
    185     macros = '\n'.join(r'\newcommand{\py%s}[1]{%s}' % c for c in commands.items())
    186     result = []
    187     for kind, text in classified_text:
    188         if kind:
    189             result.append(r'\py%s{' % kind)
    190         result.append(alltt_escape(text))
    191         if kind:
    192             result.append('}')
    193     return default_latex_document % dict(title=title, macros=macros, body=''.join(result))
    194 
    195 
    196 if __name__ == '__main__':
    197     import argparse
    198     import os.path
    199     import sys
    200     import textwrap
    201     import webbrowser
    202 
    203     parser = argparse.ArgumentParser(
    204             description = 'Add syntax highlighting to Python source code',
    205             formatter_class=argparse.RawDescriptionHelpFormatter,
    206             epilog = textwrap.dedent('''
    207                 examples:
    208 
    209                   # Show syntax highlighted code in the terminal window
    210                   $ ./highlight.py myfile.py
    211 
    212                   # Colorize myfile.py and display in a browser
    213                   $ ./highlight.py -b myfile.py
    214 
    215                   # Create an HTML section to embed in an existing webpage
    216                   ./highlight.py -s myfile.py
    217 
    218                   # Create a complete HTML file
    219                   $ ./highlight.py -c myfile.py > myfile.html
    220 
    221                   # Create a PDF using LaTeX
    222                   $ ./highlight.py -l myfile.py | pdflatex
    223 
    224             '''))
    225     parser.add_argument('sourcefile', metavar = 'SOURCEFILE',
    226             help = 'file containing Python sourcecode')
    227     parser.add_argument('-b', '--browser', action = 'store_true',
    228             help = 'launch a browser to show results')
    229     parser.add_argument('-c', '--complete', action = 'store_true',
    230             help = 'build a complete html webpage')
    231     parser.add_argument('-l', '--latex', action = 'store_true',
    232             help = 'build a LaTeX document')
    233     parser.add_argument('-r', '--raw', action = 'store_true',
    234             help = 'raw parse of categorized text')
    235     parser.add_argument('-s', '--section', action = 'store_true',
    236             help = 'show an HTML section rather than a complete webpage')
    237     args = parser.parse_args()
    238 
    239     if args.section and (args.browser or args.complete):
    240         parser.error('The -s/--section option is incompatible with '
    241                      'the -b/--browser or -c/--complete options')
    242 
    243     sourcefile = args.sourcefile
    244     with open(sourcefile) as f:
    245         source = f.read()
    246     classified_text = analyze_python(source)
    247 
    248     if args.raw:
    249         encoded = raw_highlight(classified_text)
    250     elif args.complete or args.browser:
    251         encoded = build_html_page(classified_text, title=sourcefile)
    252     elif args.section:
    253         encoded = html_highlight(classified_text)
    254     elif args.latex:
    255         encoded = latex_highlight(classified_text, title=sourcefile)
    256     else:
    257         encoded = ansi_highlight(classified_text)
    258 
    259     if args.browser:
    260         htmlfile = os.path.splitext(os.path.basename(sourcefile))[0] + '.html'
    261         with open(htmlfile, 'w') as f:
    262             f.write(encoded)
    263         webbrowser.open('file://' + os.path.abspath(htmlfile))
    264     else:
    265         sys.stdout.write(encoded)
    266