1 #!/usr/bin/python 2 # Run with directory arguments from any directory, with no special setup required. 3 # Or: 4 # for i in libc libdl libm linker libstdc++ ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done 5 6 import ftplib 7 import hashlib 8 import os 9 import re 10 import shutil 11 import string 12 import subprocess 13 import sys 14 import tarfile 15 import tempfile 16 17 VERBOSE = False 18 19 def warn(s): 20 sys.stderr.write("warning: %s\n" % s) 21 22 def warn_verbose(s): 23 if VERBOSE: 24 warn(s) 25 26 def is_interesting(path): 27 path = path.lower() 28 uninteresting_extensions = [ 29 ".bp", 30 ".map", 31 ".mk", 32 ".py", 33 ".pyc", 34 ".swp", 35 ".txt", 36 ] 37 if os.path.splitext(path)[1] in uninteresting_extensions: 38 return False 39 if path.endswith("/notice") or path.endswith("/readme"): 40 return False 41 return True 42 43 def is_auto_generated(content): 44 if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: 45 return True 46 if "This header was automatically generated from a Linux kernel header" in content: 47 return True 48 return False 49 50 copyrights = set() 51 52 def extract_copyright_at(lines, i): 53 hash = lines[i].startswith("#") 54 55 # Do we need to back up to find the start of the copyright header? 56 start = i 57 if not hash: 58 while start > 0: 59 if "/*" in lines[start - 1]: 60 break 61 start -= 1 62 63 # Read comment lines until we hit something that terminates a 64 # copyright header. 65 while i < len(lines): 66 if "*/" in lines[i]: 67 break 68 if hash and len(lines[i]) == 0: 69 break 70 if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: 71 break 72 if "\tcitrus Id: " in lines[i]: 73 break 74 if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: 75 break 76 if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: 77 break 78 # OpenBSD likes to say where stuff originally came from: 79 if "Original version ID:" in lines[i]: 80 break 81 i += 1 82 83 end = i 84 85 # Trim trailing cruft. 86 while end > 0: 87 if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": 88 break 89 end -= 1 90 91 # Remove C/assembler comment formatting, pulling out just the text. 92 clean_lines = [] 93 for line in lines[start:end]: 94 line = line.replace("\t", " ") 95 line = line.replace("/* ", "") 96 line = re.sub("^ \* ", "", line) 97 line = line.replace("** ", "") 98 line = line.replace("# ", "") 99 if line.startswith("++Copyright++"): 100 continue 101 line = line.replace("--Copyright--", "") 102 line = line.rstrip() 103 # These come last and take care of "blank" comment lines. 104 if line == "#" or line == " *" or line == "**" or line == "-": 105 line = "" 106 clean_lines.append(line) 107 108 # Trim blank lines from head and tail. 109 while clean_lines[0] == "": 110 clean_lines = clean_lines[1:] 111 while clean_lines[len(clean_lines) - 1] == "": 112 clean_lines = clean_lines[0:(len(clean_lines) - 1)] 113 114 copyright = "\n".join(clean_lines) 115 copyrights.add(copyright) 116 117 return i 118 119 120 def do_file(path): 121 with open(path, "r") as the_file: 122 try: 123 content = open(path, "r").read().decode("utf-8") 124 except UnicodeDecodeError: 125 warn("bad UTF-8 in %s" % path) 126 content = open(path, "r").read().decode("iso-8859-1") 127 128 lines = content.split("\n") 129 130 if len(lines) <= 4: 131 warn_verbose("ignoring short file %s" % path) 132 return 133 134 if is_auto_generated(content): 135 warn_verbose("ignoring auto-generated file %s" % path) 136 return 137 138 if not "Copyright" in content: 139 if "public domain" in content.lower(): 140 warn("ignoring public domain file %s" % path) 141 return 142 warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines))) 143 return 144 145 # Manually iterate because extract_copyright_at tells us how many lines to skip. 146 i = 0 147 while i < len(lines): 148 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: 149 i = extract_copyright_at(lines, i) 150 else: 151 i += 1 152 153 154 def do_dir(path): 155 for directory, sub_directories, filenames in os.walk(arg): 156 if ".git" in sub_directories: 157 sub_directories.remove(".git") 158 sub_directories = sorted(sub_directories) 159 160 for filename in sorted(filenames): 161 path = os.path.join(directory, filename) 162 if is_interesting(path): 163 do_file(path) 164 165 166 args = sys.argv[1:] 167 if len(args) == 0: 168 args = [ "." ] 169 170 for arg in args: 171 if os.path.isdir(arg): 172 do_dir(arg) 173 else: 174 do_file(arg) 175 176 for copyright in sorted(copyrights): 177 print copyright.encode("utf-8") 178 print 179 print "-------------------------------------------------------------------" 180 print 181 182 sys.exit(0) 183