Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/python
      2 # Run with directory arguments from any directory, with no special setup required.
      3 # Or:
      4 # for i in libc libdl libm linker libstdc++ ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done
      5 
      6 import ftplib
      7 import hashlib
      8 import os
      9 import re
     10 import shutil
     11 import string
     12 import subprocess
     13 import sys
     14 import tarfile
     15 import tempfile
     16 
     17 VERBOSE = False
     18 
     19 def warn(s):
     20     sys.stderr.write("warning: %s\n" % s)
     21 
     22 def warn_verbose(s):
     23     if VERBOSE:
     24         warn(s)
     25 
     26 def is_interesting(path):
     27     path = path.lower()
     28     uninteresting_extensions = [
     29         ".bp",
     30         ".map",
     31         ".mk",
     32         ".py",
     33         ".pyc",
     34         ".swp",
     35         ".txt",
     36     ]
     37     if os.path.splitext(path)[1] in uninteresting_extensions:
     38         return False
     39     if path.endswith("/notice") or path.endswith("/readme"):
     40         return False
     41     return True
     42 
     43 def is_auto_generated(content):
     44     if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
     45         return True
     46     if "This header was automatically generated from a Linux kernel header" in content:
     47         return True
     48     return False
     49 
     50 copyrights = set()
     51 
     52 def extract_copyright_at(lines, i):
     53     hash = lines[i].startswith("#")
     54 
     55     # Do we need to back up to find the start of the copyright header?
     56     start = i
     57     if not hash:
     58         while start > 0:
     59             if "/*" in lines[start - 1]:
     60                 break
     61             start -= 1
     62 
     63     # Read comment lines until we hit something that terminates a
     64     # copyright header.
     65     while i < len(lines):
     66         if "*/" in lines[i]:
     67             break
     68         if hash and len(lines[i]) == 0:
     69             break
     70         if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
     71             break
     72         if "\tcitrus Id: " in lines[i]:
     73             break
     74         if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
     75             break
     76         if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
     77             break
     78         # OpenBSD likes to say where stuff originally came from:
     79         if "Original version ID:" in lines[i]:
     80             break
     81         i += 1
     82 
     83     end = i
     84 
     85     # Trim trailing cruft.
     86     while end > 0:
     87         if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
     88             break
     89         end -= 1
     90 
     91     # Remove C/assembler comment formatting, pulling out just the text.
     92     clean_lines = []
     93     for line in lines[start:end]:
     94         line = line.replace("\t", "    ")
     95         line = line.replace("/* ", "")
     96         line = re.sub("^ \* ", "", line)
     97         line = line.replace("** ", "")
     98         line = line.replace("# ", "")
     99         if line.startswith("++Copyright++"):
    100             continue
    101         line = line.replace("--Copyright--", "")
    102         line = line.rstrip()
    103         # These come last and take care of "blank" comment lines.
    104         if line == "#" or line == " *" or line == "**" or line == "-":
    105             line = ""
    106         clean_lines.append(line)
    107 
    108     # Trim blank lines from head and tail.
    109     while clean_lines[0] == "":
    110         clean_lines = clean_lines[1:]
    111     while clean_lines[len(clean_lines) - 1] == "":
    112         clean_lines = clean_lines[0:(len(clean_lines) - 1)]
    113 
    114     copyright = "\n".join(clean_lines)
    115     copyrights.add(copyright)
    116 
    117     return i
    118 
    119 
    120 def do_file(path):
    121     with open(path, "r") as the_file:
    122         try:
    123             content = open(path, "r").read().decode("utf-8")
    124         except UnicodeDecodeError:
    125             warn("bad UTF-8 in %s" % path)
    126             content = open(path, "r").read().decode("iso-8859-1")
    127 
    128     lines = content.split("\n")
    129 
    130     if len(lines) <= 4:
    131         warn_verbose("ignoring short file %s" % path)
    132         return
    133 
    134     if is_auto_generated(content):
    135         warn_verbose("ignoring auto-generated file %s" % path)
    136         return
    137 
    138     if not "Copyright" in content:
    139         if "public domain" in content.lower():
    140             warn("ignoring public domain file %s" % path)
    141             return
    142         warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
    143         return
    144 
    145     # Manually iterate because extract_copyright_at tells us how many lines to skip.
    146     i = 0
    147     while i < len(lines):
    148         if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
    149             i = extract_copyright_at(lines, i)
    150         else:
    151             i += 1
    152 
    153 
    154 def do_dir(path):
    155     for directory, sub_directories, filenames in os.walk(arg):
    156         if ".git" in sub_directories:
    157             sub_directories.remove(".git")
    158         sub_directories = sorted(sub_directories)
    159 
    160         for filename in sorted(filenames):
    161             path = os.path.join(directory, filename)
    162             if is_interesting(path):
    163                 do_file(path)
    164 
    165 
    166 args = sys.argv[1:]
    167 if len(args) == 0:
    168     args = [ "." ]
    169 
    170 for arg in args:
    171     if os.path.isdir(arg):
    172         do_dir(arg)
    173     else:
    174         do_file(arg)
    175 
    176 for copyright in sorted(copyrights):
    177     print copyright.encode("utf-8")
    178     print
    179     print "-------------------------------------------------------------------"
    180     print
    181 
    182 sys.exit(0)
    183