1 #!/usr/bin/python 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Checks third-party licenses for the purposes of the Android WebView build. 7 8 The Android tree includes a snapshot of Chromium in order to power the system 9 WebView. This tool checks that all code uses open-source licenses compatible 10 with Android, and that we meet the requirements of those licenses. It can also 11 be used to generate an Android NOTICE file for the third-party code. 12 13 It makes use of src/tools/licenses.py and the README.chromium files on which 14 it depends. It also makes use of a data file, third_party_files_whitelist.txt, 15 which whitelists indicidual files which contain third-party code but which 16 aren't in a third-party directory with a README.chromium file. 17 """ 18 19 import optparse 20 import os 21 import re 22 import subprocess 23 import sys 24 import textwrap 25 26 27 REPOSITORY_ROOT = os.path.abspath(os.path.join( 28 os.path.dirname(__file__), '..', '..')) 29 30 sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools')) 31 import licenses 32 33 import known_issues 34 35 def GetIncompatibleDirectories(): 36 """Gets a list of third-party directories which use licenses incompatible 37 with Android. This is used by the snapshot tool. 38 Returns: 39 A list of directories. 40 """ 41 42 whitelist = [ 43 'Apache( Version)? 2(\.0)?', 44 '(New )?([23]-Clause )?BSD( [23]-Clause)?( with advertising clause)?', 45 'L?GPL ?v?2(\.[01])?( or later)?', 46 'MIT(/X11)?(-like)?', 47 'MPL 1\.1 ?/ ?GPL 2(\.0)? ?/ ?LGPL 2\.1', 48 'MPL 2(\.0)?', 49 'Microsoft Limited Public License', 50 'Microsoft Permissive License', 51 'Public Domain', 52 'SGI Free Software License B', 53 'X11', 54 ] 55 regex = '^(%s)$' % '|'.join(whitelist) 56 result = [] 57 for directory in _FindThirdPartyDirs(): 58 if directory in known_issues.KNOWN_ISSUES: 59 result.append(directory) 60 continue 61 try: 62 metadata = licenses.ParseDir(directory, REPOSITORY_ROOT, 63 require_license_file=False) 64 except licenses.LicenseError as e: 65 print 'Got LicenseError while scanning ' + directory 66 raise 67 if metadata.get('License Android Compatible', 'no').upper() == 'YES': 68 continue 69 license = re.split(' [Ll]icenses?$', metadata['License'])[0] 70 tokens = [x.strip() for x in re.split(' and |,', license) if len(x) > 0] 71 for token in tokens: 72 if not re.match(regex, token, re.IGNORECASE): 73 result.append(directory) 74 break 75 return result 76 77 class ScanResult(object): 78 Ok, Warnings, Errors = range(3) 79 80 def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files): 81 """Checks that all files which are not in a listed third-party directory, 82 and which do not use the standard Chromium license, are whitelisted. 83 Args: 84 excluded_dirs_list: The list of directories to exclude from scanning. 85 whitelisted_files: The whitelist of files. 86 Returns: 87 ScanResult.Ok if all files with non-standard license headers are whitelisted 88 and the whitelist contains no stale entries; 89 ScanResult.Warnings if there are stale entries; 90 ScanResult.Errors if new non-whitelisted entries found. 91 """ 92 93 excluded_dirs_list = [d for d in excluded_dirs_list if not 'third_party' in d] 94 # Using a common pattern for third-partyies makes the ignore regexp shorter 95 excluded_dirs_list.append('third_party') 96 # VCS dirs 97 excluded_dirs_list.append('.git') 98 excluded_dirs_list.append('.svn') 99 # Build output 100 excluded_dirs_list.append('out/Debug') 101 excluded_dirs_list.append('out/Release') 102 # 'Copyright' appears in license agreements 103 excluded_dirs_list.append('chrome/app/resources') 104 # This is a test output directory 105 excluded_dirs_list.append('chrome/tools/test/reference_build') 106 # This is tests directory, doesn't exist in the snapshot 107 excluded_dirs_list.append('content/test/data') 108 # This is a tests directory that doesn't exist in the shipped product. 109 excluded_dirs_list.append('gin/test') 110 # This is a test output directory 111 excluded_dirs_list.append('data/dom_perf') 112 # Histogram tools, doesn't exist in the snapshot 113 excluded_dirs_list.append('tools/histograms') 114 # Swarming tools, doesn't exist in the snapshot 115 excluded_dirs_list.append('tools/swarming_client') 116 # Arm sysroot tools, doesn't exist in the snapshot 117 excluded_dirs_list.append('arm-sysroot') 118 # Data is not part of open source chromium, but are included on some bots. 119 excluded_dirs_list.append('data') 120 121 args = ['android_webview/tools/find_copyrights.pl', 122 '.' 123 ] + excluded_dirs_list 124 p = subprocess.Popen(args=args, cwd=REPOSITORY_ROOT, stdout=subprocess.PIPE) 125 lines = p.communicate()[0].splitlines() 126 127 offending_files = [] 128 allowed_copyrights = '^(?:\*No copyright\*' \ 129 '|20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. ' \ 130 'All rights reserved.*)$' 131 allowed_copyrights_re = re.compile(allowed_copyrights) 132 for l in lines: 133 entries = l.split('\t') 134 if entries[1] == "GENERATED FILE": 135 continue 136 copyrights = entries[1].split(' / ') 137 for c in copyrights: 138 if c and not allowed_copyrights_re.match(c): 139 offending_files.append(os.path.normpath(entries[0])) 140 break 141 142 unknown = set(offending_files) - set(whitelisted_files) 143 if unknown: 144 print 'The following files contain a third-party license but are not in ' \ 145 'a listed third-party directory and are not whitelisted. You must ' \ 146 'add the following files to the whitelist.\n%s' % \ 147 '\n'.join(sorted(unknown)) 148 149 stale = set(whitelisted_files) - set(offending_files) 150 if stale: 151 print 'The following files are whitelisted unnecessarily. You must ' \ 152 ' remove the following files from the whitelist.\n%s' % \ 153 '\n'.join(sorted(stale)) 154 155 if unknown: 156 return ScanResult.Errors 157 elif stale: 158 return ScanResult.Warnings 159 else: 160 return ScanResult.Ok 161 162 163 def _ReadFile(path): 164 """Reads a file from disk. 165 Args: 166 path: The path of the file to read, relative to the root of the repository. 167 Returns: 168 The contents of the file as a string. 169 """ 170 171 return open(os.path.join(REPOSITORY_ROOT, path), 'rb').read() 172 173 174 def _FindThirdPartyDirs(): 175 """Gets the list of third-party directories. 176 Returns: 177 The list of third-party directories. 178 """ 179 180 # Please don't add here paths that have problems with license files, 181 # as they will end up included in Android WebView snapshot. 182 # Instead, add them into known_issues.py. 183 prune_paths = [ 184 # Placeholder directory, no third-party code. 185 os.path.join('third_party', 'adobe'), 186 # Apache 2.0 license. See 187 # https://code.google.com/p/chromium/issues/detail?id=140478. 188 os.path.join('third_party', 'bidichecker'), 189 # Isn't checked out on clients 190 os.path.join('third_party', 'gles2_conform'), 191 # The llvm-build doesn't exist for non-clang builder 192 os.path.join('third_party', 'llvm-build'), 193 # Binaries doesn't apply to android 194 os.path.join('third_party', 'widevine'), 195 # third_party directories in this tree aren't actually third party, but 196 # provide a way to shadow experimental buildfiles into those directories. 197 os.path.join('tools', 'gn', 'secondary'), 198 # Not shipped, Chromium code 199 os.path.join('tools', 'swarming_client'), 200 ] 201 third_party_dirs = licenses.FindThirdPartyDirs(prune_paths, REPOSITORY_ROOT) 202 return licenses.FilterDirsWithFiles(third_party_dirs, REPOSITORY_ROOT) 203 204 205 def _Scan(): 206 """Checks that license meta-data is present for all third-party code and 207 that all non third-party code doesn't contain external copyrighted code. 208 Returns: 209 ScanResult.Ok if everything is in order; 210 ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist 211 entries) 212 ScanResult.Errors otherwise. 213 """ 214 215 third_party_dirs = _FindThirdPartyDirs() 216 217 # First, check designated third-party directories using src/tools/licenses.py. 218 all_licenses_valid = True 219 for path in sorted(third_party_dirs): 220 try: 221 licenses.ParseDir(path, REPOSITORY_ROOT) 222 except licenses.LicenseError, e: 223 if not (path in known_issues.KNOWN_ISSUES): 224 print 'Got LicenseError "%s" while scanning %s' % (e, path) 225 all_licenses_valid = False 226 227 # Second, check for non-standard license text. 228 files_data = _ReadFile(os.path.join('android_webview', 'tools', 229 'third_party_files_whitelist.txt')) 230 whitelisted_files = [] 231 for line in files_data.splitlines(): 232 match = re.match(r'([^#\s]+)', line) 233 if match: 234 whitelisted_files.append(match.group(1)) 235 licenses_check = _CheckLicenseHeaders(third_party_dirs, whitelisted_files) 236 237 return licenses_check if all_licenses_valid else ScanResult.Errors 238 239 240 def GenerateNoticeFile(): 241 """Generates the contents of an Android NOTICE file for the third-party code. 242 This is used by the snapshot tool. 243 Returns: 244 The contents of the NOTICE file. 245 """ 246 247 third_party_dirs = _FindThirdPartyDirs() 248 249 # Don't forget Chromium's LICENSE file 250 content = [_ReadFile('LICENSE')] 251 252 # We provide attribution for all third-party directories. 253 # TODO(steveblock): Limit this to only code used by the WebView binary. 254 for directory in sorted(third_party_dirs): 255 metadata = licenses.ParseDir(directory, REPOSITORY_ROOT, 256 require_license_file=False) 257 license_file = metadata['License File'] 258 if license_file and license_file != licenses.NOT_SHIPPED: 259 content.append(_ReadFile(license_file)) 260 261 return '\n'.join(content) 262 263 264 def main(): 265 class FormatterWithNewLines(optparse.IndentedHelpFormatter): 266 def format_description(self, description): 267 paras = description.split('\n') 268 formatted_paras = [textwrap.fill(para, self.width) for para in paras] 269 return '\n'.join(formatted_paras) + '\n' 270 271 parser = optparse.OptionParser(formatter=FormatterWithNewLines(), 272 usage='%prog [options]') 273 parser.description = (__doc__ + 274 '\nCommands:\n' \ 275 ' scan Check licenses.\n' \ 276 ' notice Generate Android NOTICE file on stdout') 277 (options, args) = parser.parse_args() 278 if len(args) != 1: 279 parser.print_help() 280 return ScanResult.Errors 281 282 if args[0] == 'scan': 283 scan_result = _Scan() 284 if scan_result == ScanResult.Ok: 285 print 'OK!' 286 return scan_result 287 elif args[0] == 'notice': 288 print GenerateNoticeFile() 289 return ScanResult.Ok 290 291 parser.print_help() 292 return ScanResult.Errors 293 294 if __name__ == '__main__': 295 sys.exit(main()) 296