1 #!/usr/bin/env python 2 # 3 # Copyright (C) 2012 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 """ 17 Usage: generate-notice-files [plain text output file] [html output file] [file title] [directory of notices] 18 19 Generate the Android notice files, including both text and html files. 20 21 -h to display this usage message and exit. 22 """ 23 from collections import defaultdict 24 import getopt 25 import hashlib 26 import itertools 27 import os 28 import os.path 29 import re 30 import sys 31 32 MD5_BLOCKSIZE = 1024 * 1024 33 HTML_ESCAPE_TABLE = { 34 "&": "&", 35 '"': """, 36 "'": "'", 37 ">": ">", 38 "<": "<", 39 } 40 41 try: 42 opts, args = getopt.getopt(sys.argv[1:], "h") 43 except getopt.GetoptError, err: 44 print str(err) 45 print __doc__ 46 sys.exit(2) 47 48 for o, a in opts: 49 if o == "-h": 50 print __doc__ 51 sys.exit(2) 52 else: 53 print >> sys.stderr, "unhandled option %s" % (o,) 54 55 if len(args) != 4: 56 print """need exactly four arguments, the two output files, the file title 57 and the directory containing notices, not %d""" % (len(args),) 58 print __doc__ 59 sys.exit(1) 60 61 def hexify(s): 62 return ("%02x"*len(s)) % tuple(map(ord, s)) 63 64 def md5sum(filename): 65 """Calculate an MD5 of the file given by FILENAME, 66 and return hex digest as a string. 67 Output should be compatible with md5sum command""" 68 69 f = open(filename, "rb") 70 sum = hashlib.md5() 71 while 1: 72 block = f.read(MD5_BLOCKSIZE) 73 if not block: 74 break 75 sum.update(block) 76 f.close() 77 return hexify(sum.digest()) 78 79 80 def html_escape(text): 81 """Produce entities within text.""" 82 return "".join(HTML_ESCAPE_TABLE.get(c,c) for c in text) 83 84 HTML_OUTPUT_CSS=""" 85 <style type="text/css"> 86 body { padding: 0; font-family: sans-serif; } 87 .same-license { background-color: #eeeeee; border-top: 20px solid white; padding: 10px; } 88 .label { font-weight: bold; } 89 .file-list { margin-left: 1em; color: blue; } 90 </style> 91 """ 92 93 def combine_notice_files_html(file_hash, input_dir, output_filename): 94 """Combine notice files in FILE_HASH and output a HTML version to OUTPUT_FILENAME.""" 95 96 SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt") 97 98 # Set up a filename to row id table (anchors inside tables don't work in 99 # most browsers, but href's to table row ids do) 100 id_table = {} 101 id_count = 0 102 for value in file_hash.values(): 103 for filename in value: 104 id_table[filename] = id_count 105 id_count += 1 106 107 # Open the output file, and output the header pieces 108 output_file = open(output_filename, "wb") 109 110 print >> output_file, "<html><head>" 111 print >> output_file, HTML_OUTPUT_CSS 112 print >> output_file, '</head><body topmargin="0" leftmargin="0" rightmargin="0" bottommargin="0">' 113 114 # Output our table of contents 115 print >> output_file, '<div class="toc">' 116 print >> output_file, "<ul>" 117 118 # Flatten the list of lists into a single list of filenames 119 sorted_filenames = sorted(itertools.chain.from_iterable(file_hash.values())) 120 121 # Print out a nice table of contents 122 for filename in sorted_filenames: 123 stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename) 124 print >> output_file, '<li><a href="#id%d">%s</a></li>' % (id_table.get(filename), stripped_filename) 125 126 print >> output_file, "</ul>" 127 print >> output_file, "</div><!-- table of contents -->" 128 # Output the individual notice file lists 129 print >>output_file, '<table cellpadding="0" cellspacing="0" border="0">' 130 for value in file_hash.values(): 131 print >> output_file, '<tr id="id%d"><td class="same-license">' % id_table.get(value[0]) 132 print >> output_file, '<div class="label">Notices for file(s):</div>' 133 print >> output_file, '<div class="file-list">' 134 for filename in sorted(value): 135 print >> output_file, "%s <br/>" % (SRC_DIR_STRIP_RE.sub(r"\1", filename)) 136 print >> output_file, "</div><!-- file-list -->" 137 print >> output_file 138 print >> output_file, '<pre class="license-text">' 139 print >> output_file, html_escape(open(value[0]).read()) 140 print >> output_file, "</pre><!-- license-text -->" 141 print >> output_file, "</td></tr><!-- same-license -->" 142 print >> output_file 143 print >> output_file 144 print >> output_file 145 146 # Finish off the file output 147 print >> output_file, "</table>" 148 print >> output_file, "</body></html>" 149 output_file.close() 150 151 def combine_notice_files_text(file_hash, input_dir, output_filename, file_title): 152 """Combine notice files in FILE_HASH and output a text version to OUTPUT_FILENAME.""" 153 154 SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt") 155 output_file = open(output_filename, "wb") 156 print >> output_file, file_title 157 for value in file_hash.values(): 158 print >> output_file, "============================================================" 159 print >> output_file, "Notices for file(s):" 160 for filename in sorted(value): 161 print >> output_file, SRC_DIR_STRIP_RE.sub(r"\1", filename) 162 print >> output_file, "------------------------------------------------------------" 163 print >> output_file, open(value[0]).read() 164 output_file.close() 165 166 def main(args): 167 txt_output_file = args[0] 168 html_output_file = args[1] 169 file_title = args[2] 170 171 # Find all the notice files and md5 them 172 input_dir = os.path.normpath(args[3]) 173 files_with_same_hash = defaultdict(list) 174 for root, dir, files in os.walk(input_dir): 175 for file in files: 176 if file.endswith(".txt"): 177 filename = os.path.join(root, file) 178 file_md5sum = md5sum(filename) 179 files_with_same_hash[file_md5sum].append(filename) 180 181 182 print "Combining NOTICE files into HTML" 183 combine_notice_files_html(files_with_same_hash, input_dir, html_output_file) 184 print "Combining NOTICE files into text" 185 combine_notice_files_text(files_with_same_hash, input_dir, txt_output_file, file_title) 186 187 if __name__ == "__main__": 188 main(args) 189