Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # Copyright (C) 2012 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 """
     17 Usage: generate-notice-files --text-output [plain text output file] \
     18                --html-output [html output file] \
     19                --xml-output [xml output file] \
     20                -t [file title] -s [directory of notices]
     21 
     22 Generate the Android notice files, including both text and html files.
     23 
     24 -h to display this usage message and exit.
     25 """
     26 from collections import defaultdict
     27 import argparse
     28 import hashlib
     29 import itertools
     30 import os
     31 import os.path
     32 import re
     33 import sys
     34 
     35 MD5_BLOCKSIZE = 1024 * 1024
     36 HTML_ESCAPE_TABLE = {
     37     "&": "&",
     38     '"': """,
     39     "'": "'",
     40     ">": ">",
     41     "<": "&lt;",
     42     }
     43 
     44 def hexify(s):
     45     return ("%02x"*len(s)) % tuple(map(ord, s))
     46 
     47 def md5sum(filename):
     48     """Calculate an MD5 of the file given by FILENAME,
     49     and return hex digest as a string.
     50     Output should be compatible with md5sum command"""
     51 
     52     f = open(filename, "rb")
     53     sum = hashlib.md5()
     54     while 1:
     55         block = f.read(MD5_BLOCKSIZE)
     56         if not block:
     57             break
     58         sum.update(block)
     59     f.close()
     60     return hexify(sum.digest())
     61 
     62 
     63 def html_escape(text):
     64     """Produce entities within text."""
     65     return "".join(HTML_ESCAPE_TABLE.get(c,c) for c in text)
     66 
     67 HTML_OUTPUT_CSS="""
     68 <style type="text/css">
     69 body { padding: 0; font-family: sans-serif; }
     70 .same-license { background-color: #eeeeee; border-top: 20px solid white; padding: 10px; }
     71 .label { font-weight: bold; }
     72 .file-list { margin-left: 1em; color: blue; }
     73 </style>
     74 """
     75 
     76 def combine_notice_files_html(file_hash, input_dir, output_filename):
     77     """Combine notice files in FILE_HASH and output a HTML version to OUTPUT_FILENAME."""
     78 
     79     SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
     80 
     81     # Set up a filename to row id table (anchors inside tables don't work in
     82     # most browsers, but href's to table row ids do)
     83     id_table = {}
     84     id_count = 0
     85     for value in file_hash:
     86         for filename in value:
     87              id_table[filename] = id_count
     88         id_count += 1
     89 
     90     # Open the output file, and output the header pieces
     91     output_file = open(output_filename, "wb")
     92 
     93     print >> output_file, "<html><head>"
     94     print >> output_file, HTML_OUTPUT_CSS
     95     print >> output_file, '</head><body topmargin="0" leftmargin="0" rightmargin="0" bottommargin="0">'
     96 
     97     # Output our table of contents
     98     print >> output_file, '<div class="toc">'
     99     print >> output_file, "<ul>"
    100 
    101     # Flatten the list of lists into a single list of filenames
    102     sorted_filenames = sorted(itertools.chain.from_iterable(file_hash))
    103 
    104     # Print out a nice table of contents
    105     for filename in sorted_filenames:
    106         stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename)
    107         print >> output_file, '<li><a href="#id%d">%s</a></li>' % (id_table.get(filename), stripped_filename)
    108 
    109     print >> output_file, "</ul>"
    110     print >> output_file, "</div><!-- table of contents -->"
    111     # Output the individual notice file lists
    112     print >>output_file, '<table cellpadding="0" cellspacing="0" border="0">'
    113     for value in file_hash:
    114         print >> output_file, '<tr id="id%d"><td class="same-license">' % id_table.get(value[0])
    115         print >> output_file, '<div class="label">Notices for file(s):</div>'
    116         print >> output_file, '<div class="file-list">'
    117         for filename in value:
    118             print >> output_file, "%s <br/>" % (SRC_DIR_STRIP_RE.sub(r"\1", filename))
    119         print >> output_file, "</div><!-- file-list -->"
    120         print >> output_file
    121         print >> output_file, '<pre class="license-text">'
    122         print >> output_file, html_escape(open(value[0]).read())
    123         print >> output_file, "</pre><!-- license-text -->"
    124         print >> output_file, "</td></tr><!-- same-license -->"
    125         print >> output_file
    126         print >> output_file
    127         print >> output_file
    128 
    129     # Finish off the file output
    130     print >> output_file, "</table>"
    131     print >> output_file, "</body></html>"
    132     output_file.close()
    133 
    134 def combine_notice_files_text(file_hash, input_dir, output_filename, file_title):
    135     """Combine notice files in FILE_HASH and output a text version to OUTPUT_FILENAME."""
    136 
    137     SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
    138     output_file = open(output_filename, "wb")
    139     print >> output_file, file_title
    140     for value in file_hash:
    141       print >> output_file, "============================================================"
    142       print >> output_file, "Notices for file(s):"
    143       for filename in value:
    144         print >> output_file, SRC_DIR_STRIP_RE.sub(r"\1", filename)
    145       print >> output_file, "------------------------------------------------------------"
    146       print >> output_file, open(value[0]).read()
    147     output_file.close()
    148 
    149 def combine_notice_files_xml(files_with_same_hash, input_dir, output_filename):
    150     """Combine notice files in FILE_HASH and output a XML version to OUTPUT_FILENAME."""
    151 
    152     SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
    153 
    154     # Set up a filename to row id table (anchors inside tables don't work in
    155     # most browsers, but href's to table row ids do)
    156     id_table = {}
    157     for file_key in files_with_same_hash.keys():
    158         for filename in files_with_same_hash[file_key]:
    159              id_table[filename] = file_key
    160 
    161     # Open the output file, and output the header pieces
    162     output_file = open(output_filename, "wb")
    163 
    164     print >> output_file, '<?xml version="1.0" encoding="utf-8"?>'
    165     print >> output_file, "<licenses>"
    166 
    167     # Flatten the list of lists into a single list of filenames
    168     sorted_filenames = sorted(id_table.keys())
    169 
    170     # Print out a nice table of contents
    171     for filename in sorted_filenames:
    172         stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename)
    173         print >> output_file, '<file-name contentId="%s">%s</file-name>' % (id_table.get(filename), stripped_filename)
    174 
    175     print >> output_file
    176     print >> output_file
    177 
    178     processed_file_keys = []
    179     # Output the individual notice file lists
    180     for filename in sorted_filenames:
    181         file_key = id_table.get(filename)
    182         if file_key in processed_file_keys:
    183             continue
    184         processed_file_keys.append(file_key)
    185 
    186         print >> output_file, '<file-content contentId="%s"><![CDATA[%s]]></file-content>' % (file_key, html_escape(open(filename).read()))
    187         print >> output_file
    188 
    189     # Finish off the file output
    190     print >> output_file, "</licenses>"
    191     output_file.close()
    192 
    193 def get_args():
    194     parser = argparse.ArgumentParser()
    195     parser.add_argument(
    196         '--text-output', required=True,
    197         help='The text output file path.')
    198     parser.add_argument(
    199         '--html-output',
    200         help='The html output file path.')
    201     parser.add_argument(
    202         '--xml-output',
    203         help='The xml output file path.')
    204     parser.add_argument(
    205         '-t', '--title', required=True,
    206         help='The file title.')
    207     parser.add_argument(
    208         '-s', '--source-dir', required=True,
    209         help='The directory containing notices.')
    210     parser.add_argument(
    211         '-i', '--included-subdirs', action='append',
    212         help='The sub directories which should be included.')
    213     parser.add_argument(
    214         '-e', '--excluded-subdirs', action='append',
    215         help='The sub directories which should be excluded.')
    216     return parser.parse_args()
    217 
    218 def main(argv):
    219     args = get_args()
    220 
    221     txt_output_file = args.text_output
    222     html_output_file = args.html_output
    223     xml_output_file = args.xml_output
    224     file_title = args.title
    225     included_subdirs = []
    226     excluded_subdirs = []
    227     if args.included_subdirs is not None:
    228         included_subdirs = args.included_subdirs
    229     if args.excluded_subdirs is not None:
    230         excluded_subdirs = args.excluded_subdirs
    231 
    232     # Find all the notice files and md5 them
    233     input_dir = os.path.normpath(args.source_dir)
    234     files_with_same_hash = defaultdict(list)
    235     for root, dir, files in os.walk(input_dir):
    236         for file in files:
    237             matched = True
    238             if len(included_subdirs) > 0:
    239                 matched = False
    240                 for subdir in included_subdirs:
    241                     if root.startswith(input_dir + '/' + subdir):
    242                         matched = True
    243                         break
    244             elif len(excluded_subdirs) > 0:
    245                 for subdir in excluded_subdirs:
    246                     if root.startswith(input_dir + '/' + subdir):
    247                         matched = False
    248                         break
    249             if matched and file.endswith(".txt"):
    250                 filename = os.path.join(root, file)
    251                 file_md5sum = md5sum(filename)
    252                 files_with_same_hash[file_md5sum].append(filename)
    253 
    254     filesets = [sorted(files_with_same_hash[md5]) for md5 in sorted(files_with_same_hash.keys())]
    255 
    256     print "Combining NOTICE files into text"
    257     combine_notice_files_text(filesets, input_dir, txt_output_file, file_title)
    258 
    259     if html_output_file is not None:
    260         print "Combining NOTICE files into HTML"
    261         combine_notice_files_html(filesets, input_dir, html_output_file)
    262 
    263     if xml_output_file is not None:
    264         print "Combining NOTICE files into XML"
    265         combine_notice_files_xml(files_with_same_hash, input_dir, xml_output_file)
    266 
    267 if __name__ == "__main__":
    268     main(sys.argv)
    269