Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # Copyright (C) 2012 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 """
     17 Usage: generate-notice-files [plain text output file] [html output file] [file title] [directory of notices]
     18 
     19 Generate the Android notice files, including both text and html files.
     20 
     21 -h to display this usage message and exit.
     22 """
     23 from collections import defaultdict
     24 import getopt
     25 import hashlib
     26 import itertools
     27 import os
     28 import os.path
     29 import re
     30 import sys
     31 
     32 MD5_BLOCKSIZE = 1024 * 1024
     33 HTML_ESCAPE_TABLE = {
     34     "&": "&",
     35     '"': """,
     36     "'": "'",
     37     ">": ">",
     38     "<": "&lt;",
     39     }
     40 
     41 try:
     42   opts, args = getopt.getopt(sys.argv[1:], "h")
     43 except getopt.GetoptError, err:
     44     print str(err)
     45     print __doc__
     46     sys.exit(2)
     47 
     48 for o, a in opts:
     49   if o == "-h":
     50     print __doc__
     51     sys.exit(2)
     52   else:
     53     print >> sys.stderr, "unhandled option %s" % (o,)
     54 
     55 if len(args) != 4:
     56     print """need exactly four arguments, the two output files, the file title
     57              and the directory containing notices, not %d""" % (len(args),)
     58     print __doc__
     59     sys.exit(1)
     60 
     61 def hexify(s):
     62     return ("%02x"*len(s)) % tuple(map(ord, s))
     63 
     64 def md5sum(filename):
     65     """Calculate an MD5 of the file given by FILENAME,
     66     and return hex digest as a string.
     67     Output should be compatible with md5sum command"""
     68 
     69     f = open(filename, "rb")
     70     sum = hashlib.md5()
     71     while 1:
     72         block = f.read(MD5_BLOCKSIZE)
     73         if not block:
     74             break
     75         sum.update(block)
     76     f.close()
     77     return hexify(sum.digest())
     78 
     79 
     80 def html_escape(text):
     81     """Produce entities within text."""
     82     return "".join(HTML_ESCAPE_TABLE.get(c,c) for c in text)
     83 
     84 HTML_OUTPUT_CSS="""
     85 <style type="text/css">
     86 body { padding: 0; font-family: sans-serif; }
     87 .same-license { background-color: #eeeeee; border-top: 20px solid white; padding: 10px; }
     88 .label { font-weight: bold; }
     89 .file-list { margin-left: 1em; color: blue; }
     90 </style>
     91 """
     92 
     93 def combine_notice_files_html(file_hash, input_dir, output_filename):
     94     """Combine notice files in FILE_HASH and output a HTML version to OUTPUT_FILENAME."""
     95 
     96     SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
     97 
     98     # Set up a filename to row id table (anchors inside tables don't work in
     99     # most browsers, but href's to table row ids do)
    100     id_table = {}
    101     id_count = 0
    102     for value in file_hash:
    103         for filename in value:
    104              id_table[filename] = id_count
    105         id_count += 1
    106 
    107     # Open the output file, and output the header pieces
    108     output_file = open(output_filename, "wb")
    109 
    110     print >> output_file, "<html><head>"
    111     print >> output_file, HTML_OUTPUT_CSS
    112     print >> output_file, '</head><body topmargin="0" leftmargin="0" rightmargin="0" bottommargin="0">'
    113 
    114     # Output our table of contents
    115     print >> output_file, '<div class="toc">'
    116     print >> output_file, "<ul>"
    117 
    118     # Flatten the list of lists into a single list of filenames
    119     sorted_filenames = sorted(itertools.chain.from_iterable(file_hash))
    120 
    121     # Print out a nice table of contents
    122     for filename in sorted_filenames:
    123         stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename)
    124         print >> output_file, '<li><a href="#id%d">%s</a></li>' % (id_table.get(filename), stripped_filename)
    125 
    126     print >> output_file, "</ul>"
    127     print >> output_file, "</div><!-- table of contents -->"
    128     # Output the individual notice file lists
    129     print >>output_file, '<table cellpadding="0" cellspacing="0" border="0">'
    130     for value in file_hash:
    131         print >> output_file, '<tr id="id%d"><td class="same-license">' % id_table.get(value[0])
    132         print >> output_file, '<div class="label">Notices for file(s):</div>'
    133         print >> output_file, '<div class="file-list">'
    134         for filename in value:
    135             print >> output_file, "%s <br/>" % (SRC_DIR_STRIP_RE.sub(r"\1", filename))
    136         print >> output_file, "</div><!-- file-list -->"
    137         print >> output_file
    138         print >> output_file, '<pre class="license-text">'
    139         print >> output_file, html_escape(open(value[0]).read())
    140         print >> output_file, "</pre><!-- license-text -->"
    141         print >> output_file, "</td></tr><!-- same-license -->"
    142         print >> output_file
    143         print >> output_file
    144         print >> output_file
    145 
    146     # Finish off the file output
    147     print >> output_file, "</table>"
    148     print >> output_file, "</body></html>"
    149     output_file.close()
    150 
    151 def combine_notice_files_text(file_hash, input_dir, output_filename, file_title):
    152     """Combine notice files in FILE_HASH and output a text version to OUTPUT_FILENAME."""
    153 
    154     SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
    155     output_file = open(output_filename, "wb")
    156     print >> output_file, file_title
    157     for value in file_hash:
    158       print >> output_file, "============================================================"
    159       print >> output_file, "Notices for file(s):"
    160       for filename in value:
    161         print >> output_file, SRC_DIR_STRIP_RE.sub(r"\1", filename)
    162       print >> output_file, "------------------------------------------------------------"
    163       print >> output_file, open(value[0]).read()
    164     output_file.close()
    165 
    166 def main(args):
    167     txt_output_file = args[0]
    168     html_output_file = args[1]
    169     file_title = args[2]
    170 
    171     # Find all the notice files and md5 them
    172     input_dir = os.path.normpath(args[3])
    173     files_with_same_hash = defaultdict(list)
    174     for root, dir, files in os.walk(input_dir):
    175         for file in files:
    176             if file.endswith(".txt"):
    177                 filename = os.path.join(root, file)
    178                 file_md5sum = md5sum(filename)
    179                 files_with_same_hash[file_md5sum].append(filename)
    180 
    181     filesets = [sorted(files_with_same_hash[md5]) for md5 in sorted(files_with_same_hash.keys())]
    182 
    183     print "Combining NOTICE files into HTML"
    184     combine_notice_files_html(filesets, input_dir, html_output_file)
    185     print "Combining NOTICE files into text"
    186     combine_notice_files_text(filesets, input_dir, txt_output_file, file_title)
    187 
    188 if __name__ == "__main__":
    189     main(args)
    190