Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/python
      2 # Run with directory arguments from any directory, with no special setup required.
      3 # Or:
      4 # for i in libc libdl libm linker libstdc++ libthread_db ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done
      5 
      6 import ftplib
      7 import hashlib
      8 import os
      9 import re
     10 import shutil
     11 import string
     12 import subprocess
     13 import sys
     14 import tarfile
     15 import tempfile
     16 
     17 def IsUninteresting(path):
     18     path = path.lower()
     19     if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"):
     20         return True
     21     if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"):
     22         return True
     23     if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"):
     24         return True
     25     return False
     26 
     27 def IsAutoGenerated(content):
     28     if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
     29         return True
     30     if "This header was automatically generated from a Linux kernel header" in content:
     31         return True
     32     return False
     33 
     34 copyrights = set()
     35 
     36 def ExtractCopyrightAt(lines, i):
     37     hash = lines[i].startswith("#")
     38 
     39     # Do we need to back up to find the start of the copyright header?
     40     start = i
     41     if not hash:
     42         while start > 0:
     43             if "/*" in lines[start - 1]:
     44                 break
     45             start -= 1
     46 
     47     # Read comment lines until we hit something that terminates a
     48     # copyright header.
     49     while i < len(lines):
     50         if "*/" in lines[i]:
     51             break
     52         if hash and len(lines[i]) == 0:
     53             break
     54         if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
     55             break
     56         if "\tcitrus Id: " in lines[i]:
     57             break
     58         if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
     59             break
     60         if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
     61             break
     62         # OpenBSD likes to say where stuff originally came from:
     63         if "Original version ID:" in lines[i]:
     64             break
     65         i += 1
     66 
     67     end = i
     68 
     69     # Trim trailing cruft.
     70     while end > 0:
     71         if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
     72             break
     73         end -= 1
     74 
     75     # Remove C/assembler comment formatting, pulling out just the text.
     76     clean_lines = []
     77     for line in lines[start:end]:
     78         line = line.replace("\t", "    ")
     79         line = line.replace("/* ", "")
     80         line = re.sub("^ \* ", "", line)
     81         line = line.replace("** ", "")
     82         line = line.replace("# ", "")
     83         if line.startswith("++Copyright++"):
     84             continue
     85         line = line.replace("--Copyright--", "")
     86         line = line.rstrip()
     87         # These come last and take care of "blank" comment lines.
     88         if line == "#" or line == " *" or line == "**" or line == "-":
     89             line = ""
     90         clean_lines.append(line)
     91 
     92     # Trim blank lines from head and tail.
     93     while clean_lines[0] == "":
     94         clean_lines = clean_lines[1:]
     95     while clean_lines[len(clean_lines) - 1] == "":
     96         clean_lines = clean_lines[0:(len(clean_lines) - 1)]
     97 
     98     copyright = "\n".join(clean_lines)
     99     copyrights.add(copyright)
    100 
    101     return i
    102 
    103 args = sys.argv[1:]
    104 if len(args) == 0:
    105     args = [ "." ]
    106 
    107 for arg in args:
    108     sys.stderr.write('Searching for source files in "%s"...\n' % arg)
    109 
    110     for directory, sub_directories, filenames in os.walk(arg):
    111         if ".git" in sub_directories:
    112             sub_directories.remove(".git")
    113         sub_directories = sorted(sub_directories)
    114 
    115         for filename in sorted(filenames):
    116             path = os.path.join(directory, filename)
    117             if IsUninteresting(path):
    118                 #print "ignoring uninteresting file %s" % path
    119                 continue
    120 
    121             try:
    122                 content = open(path, 'r').read().decode('utf-8')
    123             except:
    124                 # TODO: update hash.h, md5.c, and md5.h; upstream is probably UTF-8 already.
    125                 sys.stderr.write('warning: bad UTF-8 in %s\n' % path)
    126                 content = open(path, 'r').read().decode('iso-8859-1')
    127 
    128             lines = content.split("\n")
    129 
    130             if len(lines) <= 4:
    131                 #print "ignoring short file %s" % path
    132                 continue
    133 
    134             if IsAutoGenerated(content):
    135                 #print "ignoring auto-generated file %s" % path
    136                 continue
    137 
    138             if not "Copyright" in content:
    139                 if "public domain" in content.lower():
    140                     #print "ignoring public domain file %s" % path
    141                     continue
    142                 sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines)))
    143                 continue
    144 
    145             i = 0
    146             while i < len(lines):
    147                 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
    148                     i = ExtractCopyrightAt(lines, i)
    149                 i += 1
    150 
    151             #print path
    152 
    153 for copyright in sorted(copyrights):
    154     print copyright.encode('utf-8')
    155     print
    156     print '-------------------------------------------------------------------'
    157     print
    158 
    159 sys.exit(0)
    160