Home | History | Annotate | Download | only in documentation
      1 #!/usr/bin/python
      2 
      3 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
      4 # Use of this source code is governed by a BSD-style license that can be
      5 # found in the LICENSE file.
      6 
      7 '''This utility cleans up the html files as emitted by doxygen so
      8 that they are suitable for publication on a Google documentation site.
      9 '''
     10 
     11 import optparse
     12 import os
     13 import re
     14 import shutil
     15 import string
     16 import sys
     17 try:
     18   from BeautifulSoup import BeautifulSoup, Tag
     19 except (ImportError, NotImplementedError):
     20   print ("This tool requires the BeautifulSoup package "
     21          "(see http://www.crummy.com/software/BeautifulSoup/).\n"
     22          "Make sure that the file BeautifulSoup.py is either in this directory "
     23          "or is available in your PYTHON_PATH")
     24   raise
     25 
     26 
     27 class HTMLFixer(object):
     28   '''This class cleans up the html strings as produced by Doxygen
     29   '''
     30 
     31   def __init__(self, html):
     32     self.soup = BeautifulSoup(html)
     33 
     34   def FixTableHeadings(self):
     35     '''Fixes the doxygen table headings.
     36 
     37     This includes:
     38       - Using bare <h2> title row instead of row embedded in <tr><td> in table
     39       - Putting the "name" attribute into the "id" attribute of the <tr> tag.
     40       - Splitting up tables into multiple separate tables if a table
     41         heading appears in the middle of a table.
     42 
     43     For example, this html:
     44      <table>
     45       <tr><td colspan="2"><h2><a name="pub-attribs"></a>
     46       Data Fields List</h2></td></tr>
     47       ...
     48      </table>
     49 
     50     would be converted to this:
     51      <h2>Data Fields List</h2>
     52      <table>
     53       ...
     54      </table>
     55     '''
     56 
     57     table_headers = []
     58     for tag in self.soup.findAll('tr'):
     59       if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
     60         #tag['id'] = tag.td.h2.a['name']
     61         tag.string = tag.td.h2.a.next
     62         tag.name = 'h2'
     63         table_headers.append(tag)
     64 
     65     # reverse the list so that earlier tags don't delete later tags
     66     table_headers.reverse()
     67     # Split up tables that have multiple table header (th) rows
     68     for tag in table_headers:
     69       print "Header tag: %s is %s" % (tag.name, tag.string.strip())
     70       # Is this a heading in the middle of a table?
     71       if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
     72         print "Splitting Table named %s" % tag.string.strip()
     73         table = tag.parent
     74         table_parent = table.parent
     75         table_index = table_parent.contents.index(table)
     76         new_table = Tag(self.soup, name='table', attrs=table.attrs)
     77         table_parent.insert(table_index + 1, new_table)
     78         tag_index = table.contents.index(tag)
     79         for index, row in enumerate(table.contents[tag_index:]):
     80           new_table.insert(index, row)
     81       # Now move the <h2> tag to be in front of the <table> tag
     82       assert tag.parent.name == 'table'
     83       table = tag.parent
     84       table_parent = table.parent
     85       table_index = table_parent.contents.index(table)
     86       table_parent.insert(table_index, tag)
     87 
     88   def RemoveTopHeadings(self):
     89     '''Removes <div> sections with a header, tabs, or navpath class attribute'''
     90     header_tags = self.soup.findAll(
     91         name='div',
     92         attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')})
     93     [tag.extract() for tag in header_tags]
     94 
     95   def FixAll(self):
     96     self.FixTableHeadings()
     97     self.RemoveTopHeadings()
     98 
     99   def __str__(self):
    100     return str(self.soup)
    101 
    102 
    103 def main():
    104   '''Main entry for the doxy_cleanup utility
    105 
    106   doxy_cleanup takes a list of html files and modifies them in place.'''
    107 
    108   parser = optparse.OptionParser(usage='Usage: %prog [options] files...')
    109 
    110   parser.add_option('-m', '--move', dest='move', action='store_true',
    111                     default=False, help='move html files to "original_html"')
    112 
    113   options, files = parser.parse_args()
    114 
    115   if not files:
    116     parser.print_usage()
    117     return 1
    118 
    119   for filename in files:
    120     try:
    121       with open(filename, 'r') as file:
    122         html = file.read()
    123 
    124       print "Processing %s" % filename
    125       fixer = HTMLFixer(html)
    126       fixer.FixAll()
    127       with open(filename, 'w') as file:
    128         file.write(str(fixer))
    129       if options.move:
    130         new_directory = os.path.join(
    131             os.path.dirname(os.path.dirname(filename)), 'original_html')
    132         if not os.path.exists(new_directory):
    133           os.mkdir(new_directory)
    134         shutil.move(filename, new_directory)
    135     except:
    136       print "Error while processing %s" % filename
    137       raise
    138 
    139   return 0
    140 
    141 if __name__ == '__main__':
    142   sys.exit(main())
    143