1 #!/usr/bin/env python 2 # 3 # Copyright (C) 2012 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 """ 17 Usage: generate-notice-files --text-output [plain text output file] \ 18 --html-output [html output file] \ 19 --xml-output [xml output file] \ 20 -t [file title] -s [directory of notices] 21 22 Generate the Android notice files, including both text and html files. 23 24 -h to display this usage message and exit. 25 """ 26 from collections import defaultdict 27 import argparse 28 import hashlib 29 import itertools 30 import os 31 import os.path 32 import re 33 import sys 34 35 MD5_BLOCKSIZE = 1024 * 1024 36 HTML_ESCAPE_TABLE = { 37 "&": "&", 38 '"': """, 39 "'": "'", 40 ">": ">", 41 "<": "<", 42 } 43 44 def hexify(s): 45 return ("%02x"*len(s)) % tuple(map(ord, s)) 46 47 def md5sum(filename): 48 """Calculate an MD5 of the file given by FILENAME, 49 and return hex digest as a string. 50 Output should be compatible with md5sum command""" 51 52 f = open(filename, "rb") 53 sum = hashlib.md5() 54 while 1: 55 block = f.read(MD5_BLOCKSIZE) 56 if not block: 57 break 58 sum.update(block) 59 f.close() 60 return hexify(sum.digest()) 61 62 63 def html_escape(text): 64 """Produce entities within text.""" 65 return "".join(HTML_ESCAPE_TABLE.get(c,c) for c in text) 66 67 HTML_OUTPUT_CSS=""" 68 <style type="text/css"> 69 body { padding: 0; font-family: sans-serif; } 70 .same-license { background-color: #eeeeee; border-top: 20px solid white; padding: 10px; } 71 .label { font-weight: bold; } 72 .file-list { margin-left: 1em; color: blue; } 73 </style> 74 """ 75 76 def combine_notice_files_html(file_hash, input_dir, output_filename): 77 """Combine notice files in FILE_HASH and output a HTML version to OUTPUT_FILENAME.""" 78 79 SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt") 80 81 # Set up a filename to row id table (anchors inside tables don't work in 82 # most browsers, but href's to table row ids do) 83 id_table = {} 84 id_count = 0 85 for value in file_hash: 86 for filename in value: 87 id_table[filename] = id_count 88 id_count += 1 89 90 # Open the output file, and output the header pieces 91 output_file = open(output_filename, "wb") 92 93 print >> output_file, "<html><head>" 94 print >> output_file, HTML_OUTPUT_CSS 95 print >> output_file, '</head><body topmargin="0" leftmargin="0" rightmargin="0" bottommargin="0">' 96 97 # Output our table of contents 98 print >> output_file, '<div class="toc">' 99 print >> output_file, "<ul>" 100 101 # Flatten the list of lists into a single list of filenames 102 sorted_filenames = sorted(itertools.chain.from_iterable(file_hash)) 103 104 # Print out a nice table of contents 105 for filename in sorted_filenames: 106 stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename) 107 print >> output_file, '<li><a href="#id%d">%s</a></li>' % (id_table.get(filename), stripped_filename) 108 109 print >> output_file, "</ul>" 110 print >> output_file, "</div><!-- table of contents -->" 111 # Output the individual notice file lists 112 print >>output_file, '<table cellpadding="0" cellspacing="0" border="0">' 113 for value in file_hash: 114 print >> output_file, '<tr id="id%d"><td class="same-license">' % id_table.get(value[0]) 115 print >> output_file, '<div class="label">Notices for file(s):</div>' 116 print >> output_file, '<div class="file-list">' 117 for filename in value: 118 print >> output_file, "%s <br/>" % (SRC_DIR_STRIP_RE.sub(r"\1", filename)) 119 print >> output_file, "</div><!-- file-list -->" 120 print >> output_file 121 print >> output_file, '<pre class="license-text">' 122 print >> output_file, html_escape(open(value[0]).read()) 123 print >> output_file, "</pre><!-- license-text -->" 124 print >> output_file, "</td></tr><!-- same-license -->" 125 print >> output_file 126 print >> output_file 127 print >> output_file 128 129 # Finish off the file output 130 print >> output_file, "</table>" 131 print >> output_file, "</body></html>" 132 output_file.close() 133 134 def combine_notice_files_text(file_hash, input_dir, output_filename, file_title): 135 """Combine notice files in FILE_HASH and output a text version to OUTPUT_FILENAME.""" 136 137 SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt") 138 output_file = open(output_filename, "wb") 139 print >> output_file, file_title 140 for value in file_hash: 141 print >> output_file, "============================================================" 142 print >> output_file, "Notices for file(s):" 143 for filename in value: 144 print >> output_file, SRC_DIR_STRIP_RE.sub(r"\1", filename) 145 print >> output_file, "------------------------------------------------------------" 146 print >> output_file, open(value[0]).read() 147 output_file.close() 148 149 def combine_notice_files_xml(files_with_same_hash, input_dir, output_filename): 150 """Combine notice files in FILE_HASH and output a XML version to OUTPUT_FILENAME.""" 151 152 SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt") 153 154 # Set up a filename to row id table (anchors inside tables don't work in 155 # most browsers, but href's to table row ids do) 156 id_table = {} 157 for file_key in files_with_same_hash.keys(): 158 for filename in files_with_same_hash[file_key]: 159 id_table[filename] = file_key 160 161 # Open the output file, and output the header pieces 162 output_file = open(output_filename, "wb") 163 164 print >> output_file, '<?xml version="1.0" encoding="utf-8"?>' 165 print >> output_file, "<licenses>" 166 167 # Flatten the list of lists into a single list of filenames 168 sorted_filenames = sorted(id_table.keys()) 169 170 # Print out a nice table of contents 171 for filename in sorted_filenames: 172 stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename) 173 print >> output_file, '<file-name contentId="%s">%s</file-name>' % (id_table.get(filename), stripped_filename) 174 175 print >> output_file 176 print >> output_file 177 178 processed_file_keys = [] 179 # Output the individual notice file lists 180 for filename in sorted_filenames: 181 file_key = id_table.get(filename) 182 if file_key in processed_file_keys: 183 continue 184 processed_file_keys.append(file_key) 185 186 print >> output_file, '<file-content contentId="%s"><![CDATA[%s]]></file-content>' % (file_key, html_escape(open(filename).read())) 187 print >> output_file 188 189 # Finish off the file output 190 print >> output_file, "</licenses>" 191 output_file.close() 192 193 def get_args(): 194 parser = argparse.ArgumentParser() 195 parser.add_argument( 196 '--text-output', required=True, 197 help='The text output file path.') 198 parser.add_argument( 199 '--html-output', 200 help='The html output file path.') 201 parser.add_argument( 202 '--xml-output', 203 help='The xml output file path.') 204 parser.add_argument( 205 '-t', '--title', required=True, 206 help='The file title.') 207 parser.add_argument( 208 '-s', '--source-dir', required=True, 209 help='The directory containing notices.') 210 parser.add_argument( 211 '-i', '--included-subdirs', action='append', 212 help='The sub directories which should be included.') 213 parser.add_argument( 214 '-e', '--excluded-subdirs', action='append', 215 help='The sub directories which should be excluded.') 216 return parser.parse_args() 217 218 def main(argv): 219 args = get_args() 220 221 txt_output_file = args.text_output 222 html_output_file = args.html_output 223 xml_output_file = args.xml_output 224 file_title = args.title 225 included_subdirs = [] 226 excluded_subdirs = [] 227 if args.included_subdirs is not None: 228 included_subdirs = args.included_subdirs 229 if args.excluded_subdirs is not None: 230 excluded_subdirs = args.excluded_subdirs 231 232 # Find all the notice files and md5 them 233 input_dir = os.path.normpath(args.source_dir) 234 files_with_same_hash = defaultdict(list) 235 for root, dir, files in os.walk(input_dir): 236 for file in files: 237 matched = True 238 if len(included_subdirs) > 0: 239 matched = False 240 for subdir in included_subdirs: 241 if root.startswith(input_dir + '/' + subdir): 242 matched = True 243 break 244 elif len(excluded_subdirs) > 0: 245 for subdir in excluded_subdirs: 246 if root.startswith(input_dir + '/' + subdir): 247 matched = False 248 break 249 if matched and file.endswith(".txt"): 250 filename = os.path.join(root, file) 251 file_md5sum = md5sum(filename) 252 files_with_same_hash[file_md5sum].append(filename) 253 254 filesets = [sorted(files_with_same_hash[md5]) for md5 in sorted(files_with_same_hash.keys())] 255 256 print "Combining NOTICE files into text" 257 combine_notice_files_text(filesets, input_dir, txt_output_file, file_title) 258 259 if html_output_file is not None: 260 print "Combining NOTICE files into HTML" 261 combine_notice_files_html(filesets, input_dir, html_output_file) 262 263 if xml_output_file is not None: 264 print "Combining NOTICE files into XML" 265 combine_notice_files_xml(files_with_same_hash, input_dir, xml_output_file) 266 267 if __name__ == "__main__": 268 main(sys.argv) 269