1 #!/usr/bin/python 2 # Run with directory arguments from any directory, with no special setup required. 3 # Or: 4 # for i in libc libdl libm linker libstdc++ ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done 5 6 import ftplib 7 import hashlib 8 import os 9 import re 10 import shutil 11 import string 12 import subprocess 13 import sys 14 import tarfile 15 import tempfile 16 17 def IsUninteresting(path): 18 path = path.lower() 19 if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"): 20 return True 21 if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"): 22 return True 23 if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"): 24 return True 25 return False 26 27 def IsAutoGenerated(content): 28 if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: 29 return True 30 if "This header was automatically generated from a Linux kernel header" in content: 31 return True 32 return False 33 34 copyrights = set() 35 36 def ExtractCopyrightAt(lines, i): 37 hash = lines[i].startswith("#") 38 39 # Do we need to back up to find the start of the copyright header? 40 start = i 41 if not hash: 42 while start > 0: 43 if "/*" in lines[start - 1]: 44 break 45 start -= 1 46 47 # Read comment lines until we hit something that terminates a 48 # copyright header. 49 while i < len(lines): 50 if "*/" in lines[i]: 51 break 52 if hash and len(lines[i]) == 0: 53 break 54 if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: 55 break 56 if "\tcitrus Id: " in lines[i]: 57 break 58 if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: 59 break 60 if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: 61 break 62 # OpenBSD likes to say where stuff originally came from: 63 if "Original version ID:" in lines[i]: 64 break 65 i += 1 66 67 end = i 68 69 # Trim trailing cruft. 70 while end > 0: 71 if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": 72 break 73 end -= 1 74 75 # Remove C/assembler comment formatting, pulling out just the text. 76 clean_lines = [] 77 for line in lines[start:end]: 78 line = line.replace("\t", " ") 79 line = line.replace("/* ", "") 80 line = re.sub("^ \* ", "", line) 81 line = line.replace("** ", "") 82 line = line.replace("# ", "") 83 if line.startswith("++Copyright++"): 84 continue 85 line = line.replace("--Copyright--", "") 86 line = line.rstrip() 87 # These come last and take care of "blank" comment lines. 88 if line == "#" or line == " *" or line == "**" or line == "-": 89 line = "" 90 clean_lines.append(line) 91 92 # Trim blank lines from head and tail. 93 while clean_lines[0] == "": 94 clean_lines = clean_lines[1:] 95 while clean_lines[len(clean_lines) - 1] == "": 96 clean_lines = clean_lines[0:(len(clean_lines) - 1)] 97 98 copyright = "\n".join(clean_lines) 99 copyrights.add(copyright) 100 101 return i 102 103 args = sys.argv[1:] 104 if len(args) == 0: 105 args = [ "." ] 106 107 for arg in args: 108 sys.stderr.write('Searching for source files in "%s"...\n' % arg) 109 110 for directory, sub_directories, filenames in os.walk(arg): 111 if ".git" in sub_directories: 112 sub_directories.remove(".git") 113 sub_directories = sorted(sub_directories) 114 115 for filename in sorted(filenames): 116 path = os.path.join(directory, filename) 117 if IsUninteresting(path): 118 #print "ignoring uninteresting file %s" % path 119 continue 120 121 try: 122 content = open(path, 'r').read().decode('utf-8') 123 except: 124 sys.stderr.write('warning: bad UTF-8 in %s\n' % path) 125 content = open(path, 'r').read().decode('iso-8859-1') 126 127 lines = content.split("\n") 128 129 if len(lines) <= 4: 130 #print "ignoring short file %s" % path 131 continue 132 133 if IsAutoGenerated(content): 134 #print "ignoring auto-generated file %s" % path 135 continue 136 137 if not "Copyright" in content: 138 if "public domain" in content.lower(): 139 #print "ignoring public domain file %s" % path 140 continue 141 sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines))) 142 continue 143 144 i = 0 145 while i < len(lines): 146 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: 147 i = ExtractCopyrightAt(lines, i) 148 i += 1 149 150 #print path 151 152 for copyright in sorted(copyrights): 153 print copyright.encode('utf-8') 154 print 155 print '-------------------------------------------------------------------' 156 print 157 158 sys.exit(0) 159