1 #!/usr/bin/python 2 3 # Copyright (c) 2011 The Chromium Authors. All rights reserved. 4 # Use of this source code is governed by a BSD-style license that can be 5 # found in the LICENSE file. 6 7 '''This utility cleans up the html files as emitted by doxygen so 8 that they are suitable for publication on a Google documentation site. 9 ''' 10 11 import optparse 12 import os 13 import re 14 import shutil 15 import string 16 import sys 17 try: 18 from BeautifulSoup import BeautifulSoup, Tag 19 except (ImportError, NotImplementedError): 20 print ("This tool requires the BeautifulSoup package " 21 "(see http://www.crummy.com/software/BeautifulSoup/).\n" 22 "Make sure that the file BeautifulSoup.py is either in this directory " 23 "or is available in your PYTHON_PATH") 24 raise 25 26 27 class HTMLFixer(object): 28 '''This class cleans up the html strings as produced by Doxygen 29 ''' 30 31 def __init__(self, html): 32 self.soup = BeautifulSoup(html) 33 34 def FixTableHeadings(self): 35 '''Fixes the doxygen table headings. 36 37 This includes: 38 - Using bare <h2> title row instead of row embedded in <tr><td> in table 39 - Putting the "name" attribute into the "id" attribute of the <tr> tag. 40 - Splitting up tables into multiple separate tables if a table 41 heading appears in the middle of a table. 42 43 For example, this html: 44 <table> 45 <tr><td colspan="2"><h2><a name="pub-attribs"></a> 46 Data Fields List</h2></td></tr> 47 ... 48 </table> 49 50 would be converted to this: 51 <h2>Data Fields List</h2> 52 <table> 53 ... 54 </table> 55 ''' 56 57 table_headers = [] 58 for tag in self.soup.findAll('tr'): 59 if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']: 60 #tag['id'] = tag.td.h2.a['name'] 61 tag.string = tag.td.h2.a.next 62 tag.name = 'h2' 63 table_headers.append(tag) 64 65 # reverse the list so that earlier tags don't delete later tags 66 table_headers.reverse() 67 # Split up tables that have multiple table header (th) rows 68 for tag in table_headers: 69 print "Header tag: %s is %s" % (tag.name, tag.string.strip()) 70 # Is this a heading in the middle of a table? 71 if tag.findPreviousSibling('tr') and tag.parent.name == 'table': 72 print "Splitting Table named %s" % tag.string.strip() 73 table = tag.parent 74 table_parent = table.parent 75 table_index = table_parent.contents.index(table) 76 new_table = Tag(self.soup, name='table', attrs=table.attrs) 77 table_parent.insert(table_index + 1, new_table) 78 tag_index = table.contents.index(tag) 79 for index, row in enumerate(table.contents[tag_index:]): 80 new_table.insert(index, row) 81 # Now move the <h2> tag to be in front of the <table> tag 82 assert tag.parent.name == 'table' 83 table = tag.parent 84 table_parent = table.parent 85 table_index = table_parent.contents.index(table) 86 table_parent.insert(table_index, tag) 87 88 def RemoveTopHeadings(self): 89 '''Removes <div> sections with a header, tabs, or navpath class attribute''' 90 header_tags = self.soup.findAll( 91 name='div', 92 attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')}) 93 [tag.extract() for tag in header_tags] 94 95 def FixAll(self): 96 self.FixTableHeadings() 97 self.RemoveTopHeadings() 98 99 def __str__(self): 100 return str(self.soup) 101 102 103 def main(): 104 '''Main entry for the doxy_cleanup utility 105 106 doxy_cleanup takes a list of html files and modifies them in place.''' 107 108 parser = optparse.OptionParser(usage='Usage: %prog [options] files...') 109 110 parser.add_option('-m', '--move', dest='move', action='store_true', 111 default=False, help='move html files to "original_html"') 112 113 options, files = parser.parse_args() 114 115 if not files: 116 parser.print_usage() 117 return 1 118 119 for filename in files: 120 try: 121 with open(filename, 'r') as file: 122 html = file.read() 123 124 print "Processing %s" % filename 125 fixer = HTMLFixer(html) 126 fixer.FixAll() 127 with open(filename, 'w') as file: 128 file.write(str(fixer)) 129 if options.move: 130 new_directory = os.path.join( 131 os.path.dirname(os.path.dirname(filename)), 'original_html') 132 if not os.path.exists(new_directory): 133 os.mkdir(new_directory) 134 shutil.move(filename, new_directory) 135 except: 136 print "Error while processing %s" % filename 137 raise 138 139 return 0 140 141 if __name__ == '__main__': 142 sys.exit(main()) 143