1 #!/usr/bin/python 2 # Run with directory arguments from any directory, with no special setup required. 3 # Or: 4 # for i in libc libdl libm linker libstdc++ libthread_db ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done 5 6 import ftplib 7 import hashlib 8 import os 9 import re 10 import shutil 11 import string 12 import subprocess 13 import sys 14 import tarfile 15 import tempfile 16 17 def IsUninteresting(path): 18 path = path.lower() 19 if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"): 20 return True 21 if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"): 22 return True 23 if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"): 24 return True 25 return False 26 27 def IsAutoGenerated(content): 28 if "generated by gensyscalls.py" in content or "generated by genserv.py" in content: 29 return True 30 if "This header was automatically generated from a Linux kernel header" in content: 31 return True 32 return False 33 34 copyrights = set() 35 36 def ExtractCopyrightAt(lines, i): 37 hash = lines[i].startswith("#") 38 39 # Do we need to back up to find the start of the copyright header? 40 start = i 41 if not hash: 42 while start > 0: 43 if "/*" in lines[start - 1]: 44 break 45 start -= 1 46 47 # Read comment lines until we hit something that terminates a 48 # copyright header. 49 while i < len(lines): 50 if "*/" in lines[i]: 51 break 52 if hash and len(lines[i]) == 0: 53 break 54 if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: 55 break 56 if "\tcitrus Id: " in lines[i]: 57 break 58 if "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: 59 break 60 if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: 61 break 62 i += 1 63 64 end = i 65 66 # Trim trailing cruft. 67 while end > 0: 68 if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": 69 break 70 end -= 1 71 72 # Remove C/assembler comment formatting, pulling out just the text. 73 clean_lines = [] 74 for line in lines[start:end]: 75 line = line.replace("\t", " ") 76 line = line.replace("/* ", "") 77 line = line.replace(" * ", "") 78 line = line.replace("** ", "") 79 line = line.replace("# ", "") 80 if line.startswith("++Copyright++"): 81 continue 82 line = line.replace("--Copyright--", "") 83 line = line.rstrip() 84 # These come last and take care of "blank" comment lines. 85 if line == "#" or line == " *" or line == "**" or line == "-": 86 line = "" 87 clean_lines.append(line) 88 89 # Trim blank lines from head and tail. 90 while clean_lines[0] == "": 91 clean_lines = clean_lines[1:] 92 while clean_lines[len(clean_lines) - 1] == "": 93 clean_lines = clean_lines[0:(len(clean_lines) - 1)] 94 95 copyright = "\n".join(clean_lines) 96 copyrights.add(copyright) 97 98 return i 99 100 args = sys.argv[1:] 101 if len(args) == 0: 102 args = [ "." ] 103 104 for arg in args: 105 sys.stderr.write('Searching for source files in "%s"...\n' % arg) 106 107 for directory, sub_directories, filenames in os.walk(arg): 108 if ".git" in sub_directories: 109 sub_directories.remove(".git") 110 sub_directories = sorted(sub_directories) 111 112 for filename in sorted(filenames): 113 path = os.path.join(directory, filename) 114 if IsUninteresting(path): 115 #print "ignoring uninteresting file %s" % path 116 continue 117 118 try: 119 content = open(path, 'r').read().decode('utf-8') 120 except: 121 # TODO: update hash.h, md5.c, and md5.h; upstream is probably UTF-8 already. 122 sys.stderr.write('warning: bad UTF-8 in %s\n' % path) 123 content = open(path, 'r').read().decode('iso-8859-1') 124 125 lines = content.split("\n") 126 127 if len(lines) <= 4: 128 #print "ignoring short file %s" % path 129 continue 130 131 if IsAutoGenerated(content): 132 #print "ignoring auto-generated file %s" % path 133 continue 134 135 if not "Copyright" in content: 136 if "public domain" in content.lower(): 137 #print "ignoring public domain file %s" % path 138 continue 139 sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines))) 140 continue 141 142 i = 0 143 while i < len(lines): 144 if "Copyright" in lines[i]: 145 i = ExtractCopyrightAt(lines, i) 146 i += 1 147 148 #print path 149 150 for copyright in sorted(copyrights): 151 print copyright.encode('utf-8') 152 print 153 print '-------------------------------------------------------------------' 154 print 155 156 sys.exit(0) 157