1 #!/usr/bin/python 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Checks third-party licenses for the purposes of the Android WebView build. 7 8 The Android tree includes a snapshot of Chromium in order to power the system 9 WebView. This tool checks that all code uses open-source licenses compatible 10 with Android, and that we meet the requirements of those licenses. It can also 11 be used to generate an Android NOTICE file for the third-party code. 12 13 It makes use of src/tools/licenses.py and the README.chromium files on which 14 it depends. It also makes use of a data file, third_party_files_whitelist.txt, 15 which whitelists indicidual files which contain third-party code but which 16 aren't in a third-party directory with a README.chromium file. 17 """ 18 19 import glob 20 import imp 21 import multiprocessing 22 import optparse 23 import os 24 import re 25 import subprocess 26 import sys 27 import textwrap 28 29 30 REPOSITORY_ROOT = os.path.abspath(os.path.join( 31 os.path.dirname(__file__), '..', '..')) 32 33 # Import third_party/PRESUBMIT.py via imp to avoid importing a random 34 # PRESUBMIT.py from $PATH, also make sure we don't generate a .pyc file. 35 sys.dont_write_bytecode = True 36 third_party = \ 37 imp.load_source('PRESUBMIT', \ 38 os.path.join(REPOSITORY_ROOT, 'third_party', 'PRESUBMIT.py')) 39 40 sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools')) 41 import licenses 42 43 import known_issues 44 45 class InputApi(object): 46 def __init__(self): 47 self.re = re 48 49 def GetIncompatibleDirectories(): 50 """Gets a list of third-party directories which use licenses incompatible 51 with Android. This is used by the snapshot tool. 52 Returns: 53 A list of directories. 54 """ 55 56 result = [] 57 for directory in _FindThirdPartyDirs(): 58 if directory in known_issues.KNOWN_ISSUES: 59 result.append(directory) 60 continue 61 try: 62 metadata = licenses.ParseDir(directory, REPOSITORY_ROOT, 63 require_license_file=False) 64 except licenses.LicenseError as e: 65 print 'Got LicenseError while scanning ' + directory 66 raise 67 if metadata.get('License Android Compatible', 'no').upper() == 'YES': 68 continue 69 license = re.split(' [Ll]icenses?$', metadata['License'])[0] 70 if not third_party.LicenseIsCompatibleWithAndroid(InputApi(), license): 71 result.append(directory) 72 return result 73 74 def GetUnknownIncompatibleDirectories(): 75 """Gets a list of third-party directories which use licenses incompatible 76 with Android which are not present in the known_issues.py file. 77 This is used by the AOSP bot. 78 Returns: 79 A list of directories. 80 """ 81 incompatible_directories = frozenset(GetIncompatibleDirectories()) 82 known_incompatible = [] 83 for path, exclude_list in known_issues.KNOWN_INCOMPATIBLE.iteritems(): 84 for exclude in exclude_list: 85 if glob.has_magic(exclude): 86 exclude_dirname = os.path.dirname(exclude) 87 if glob.has_magic(exclude_dirname): 88 print ('Exclude path %s contains an unexpected glob expression,' \ 89 ' skipping.' % exclude) 90 exclude = exclude_dirname 91 known_incompatible.append(os.path.normpath(os.path.join(path, exclude))) 92 known_incompatible = frozenset(known_incompatible) 93 return incompatible_directories.difference(known_incompatible) 94 95 96 class ScanResult(object): 97 Ok, Warnings, Errors = range(3) 98 99 # Needs to be a top-level function for multiprocessing 100 def _FindCopyrights(files_to_scan): 101 args = [os.path.join('android_webview', 'tools', 'find_copyrights.pl')] 102 p = subprocess.Popen( 103 args=args, cwd=REPOSITORY_ROOT, 104 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 105 lines = p.communicate(files_to_scan)[0].splitlines() 106 107 offending_files = [] 108 allowed_copyrights = '^(?:\*No copyright\*' \ 109 '|20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. ' \ 110 'All rights reserved.*)$' 111 allowed_copyrights_re = re.compile(allowed_copyrights) 112 for l in lines: 113 entries = l.split('\t') 114 if entries[1] == "GENERATED FILE": 115 continue 116 copyrights = entries[1].split(' / ') 117 for c in copyrights: 118 if c and not allowed_copyrights_re.match(c): 119 offending_files.append(os.path.normpath(entries[0])) 120 break 121 return offending_files 122 123 def _ShardString(s, delimiter, shard_len): 124 result = [] 125 index = 0 126 last_pos = 0 127 for m in re.finditer(delimiter, s): 128 index += 1 129 if index % shard_len == 0: 130 result.append(s[last_pos:m.end()]) 131 last_pos = m.end() 132 if not index % shard_len == 0: 133 result.append(s[last_pos:]) 134 return result 135 136 def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files): 137 """Checks that all files which are not in a listed third-party directory, 138 and which do not use the standard Chromium license, are whitelisted. 139 Args: 140 excluded_dirs_list: The list of directories to exclude from scanning. 141 whitelisted_files: The whitelist of files. 142 Returns: 143 ScanResult.Ok if all files with non-standard license headers are whitelisted 144 and the whitelist contains no stale entries; 145 ScanResult.Warnings if there are stale entries; 146 ScanResult.Errors if new non-whitelisted entries found. 147 """ 148 149 excluded_dirs_list = [d for d in excluded_dirs_list if not 'third_party' in d] 150 # Using a common pattern for third-partyies makes the ignore regexp shorter 151 excluded_dirs_list.append('third_party') 152 # VCS dirs 153 excluded_dirs_list.append('.git') 154 excluded_dirs_list.append('.svn') 155 # Build output 156 excluded_dirs_list.append('out/Debug') 157 excluded_dirs_list.append('out/Release') 158 # 'Copyright' appears in license agreements 159 excluded_dirs_list.append('chrome/app/resources') 160 # Quickoffice js files from internal src used on buildbots. crbug.com/350472. 161 excluded_dirs_list.append('chrome/browser/resources/chromeos/quickoffice') 162 # This is a test output directory 163 excluded_dirs_list.append('chrome/tools/test/reference_build') 164 # blink style copy right headers. 165 excluded_dirs_list.append('content/shell/renderer/test_runner') 166 # blink style copy right headers. 167 excluded_dirs_list.append('content/shell/tools/plugin') 168 # This is tests directory, doesn't exist in the snapshot 169 excluded_dirs_list.append('content/test/data') 170 # This is a tests directory that doesn't exist in the shipped product. 171 excluded_dirs_list.append('gin/test') 172 # This is a test output directory 173 excluded_dirs_list.append('data/dom_perf') 174 # This is a tests directory that doesn't exist in the shipped product. 175 excluded_dirs_list.append('tools/perf/page_sets') 176 excluded_dirs_list.append('tools/perf/page_sets/tough_animation_cases') 177 # Histogram tools, doesn't exist in the snapshot 178 excluded_dirs_list.append('tools/histograms') 179 # Swarming tools, doesn't exist in the snapshot 180 excluded_dirs_list.append('tools/swarming_client') 181 # Arm sysroot tools, doesn't exist in the snapshot 182 excluded_dirs_list.append('arm-sysroot') 183 # Data is not part of open source chromium, but are included on some bots. 184 excluded_dirs_list.append('data') 185 # This is not part of open source chromium, but are included on some bots. 186 excluded_dirs_list.append('skia/tools/clusterfuzz-data') 187 188 args = [os.path.join('android_webview', 'tools', 'find_files.pl'), 189 '.' 190 ] + excluded_dirs_list 191 p = subprocess.Popen(args=args, cwd=REPOSITORY_ROOT, stdout=subprocess.PIPE) 192 files_to_scan = p.communicate()[0] 193 194 sharded_files_to_scan = _ShardString(files_to_scan, '\n', 2000) 195 pool = multiprocessing.Pool() 196 offending_files_chunks = pool.map_async( 197 _FindCopyrights, sharded_files_to_scan).get(999999) 198 pool.close() 199 pool.join() 200 # Flatten out the result 201 offending_files = \ 202 [item for sublist in offending_files_chunks for item in sublist] 203 204 unknown = set(offending_files) - set(whitelisted_files) 205 if unknown: 206 print 'The following files contain a third-party license but are not in ' \ 207 'a listed third-party directory and are not whitelisted. You must ' \ 208 'add the following files to the whitelist.\n%s' % \ 209 '\n'.join(sorted(unknown)) 210 211 stale = set(whitelisted_files) - set(offending_files) 212 if stale: 213 print 'The following files are whitelisted unnecessarily. You must ' \ 214 'remove the following files from the whitelist.\n%s' % \ 215 '\n'.join(sorted(stale)) 216 missing = [f for f in whitelisted_files if not os.path.exists(f)] 217 if missing: 218 print 'The following files are whitelisted, but do not exist.\n%s' % \ 219 '\n'.join(sorted(missing)) 220 221 if unknown: 222 return ScanResult.Errors 223 elif stale or missing: 224 return ScanResult.Warnings 225 else: 226 return ScanResult.Ok 227 228 229 def _ReadFile(path): 230 """Reads a file from disk. 231 Args: 232 path: The path of the file to read, relative to the root of the repository. 233 Returns: 234 The contents of the file as a string. 235 """ 236 237 return open(os.path.join(REPOSITORY_ROOT, path), 'rb').read() 238 239 240 def _FindThirdPartyDirs(): 241 """Gets the list of third-party directories. 242 Returns: 243 The list of third-party directories. 244 """ 245 246 # Please don't add here paths that have problems with license files, 247 # as they will end up included in Android WebView snapshot. 248 # Instead, add them into known_issues.py. 249 prune_paths = [ 250 # Temporary until we figure out how not to check out quickoffice on the 251 # Android license check bot. Tracked in crbug.com/350472. 252 os.path.join('chrome', 'browser', 'resources', 'chromeos', 'quickoffice'), 253 # Placeholder directory, no third-party code. 254 os.path.join('third_party', 'adobe'), 255 # Apache 2.0 license. See 256 # https://code.google.com/p/chromium/issues/detail?id=140478. 257 os.path.join('third_party', 'bidichecker'), 258 # Isn't checked out on clients 259 os.path.join('third_party', 'gles2_conform'), 260 # The llvm-build doesn't exist for non-clang builder 261 os.path.join('third_party', 'llvm-build'), 262 # Binaries doesn't apply to android 263 os.path.join('third_party', 'widevine'), 264 # third_party directories in this tree aren't actually third party, but 265 # provide a way to shadow experimental buildfiles into those directories. 266 os.path.join('build', 'secondary'), 267 # Not shipped, Chromium code 268 os.path.join('tools', 'swarming_client'), 269 ] 270 third_party_dirs = licenses.FindThirdPartyDirs(prune_paths, REPOSITORY_ROOT) 271 return licenses.FilterDirsWithFiles(third_party_dirs, REPOSITORY_ROOT) 272 273 274 def _Scan(): 275 """Checks that license meta-data is present for all third-party code and 276 that all non third-party code doesn't contain external copyrighted code. 277 Returns: 278 ScanResult.Ok if everything is in order; 279 ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist 280 entries) 281 ScanResult.Errors otherwise. 282 """ 283 284 third_party_dirs = _FindThirdPartyDirs() 285 286 # First, check designated third-party directories using src/tools/licenses.py. 287 all_licenses_valid = True 288 for path in sorted(third_party_dirs): 289 try: 290 licenses.ParseDir(path, REPOSITORY_ROOT) 291 except licenses.LicenseError, e: 292 if not (path in known_issues.KNOWN_ISSUES): 293 print 'Got LicenseError "%s" while scanning %s' % (e, path) 294 all_licenses_valid = False 295 296 # Second, check for non-standard license text. 297 files_data = _ReadFile(os.path.join('android_webview', 'tools', 298 'third_party_files_whitelist.txt')) 299 whitelisted_files = [] 300 for line in files_data.splitlines(): 301 match = re.match(r'([^#\s]+)', line) 302 if match: 303 whitelisted_files.append(match.group(1)) 304 licenses_check = _CheckLicenseHeaders(third_party_dirs, whitelisted_files) 305 306 return licenses_check if all_licenses_valid else ScanResult.Errors 307 308 309 def GenerateNoticeFile(): 310 """Generates the contents of an Android NOTICE file for the third-party code. 311 This is used by the snapshot tool. 312 Returns: 313 The contents of the NOTICE file. 314 """ 315 316 third_party_dirs = _FindThirdPartyDirs() 317 318 # Don't forget Chromium's LICENSE file 319 content = [_ReadFile('LICENSE')] 320 321 # We provide attribution for all third-party directories. 322 # TODO(steveblock): Limit this to only code used by the WebView binary. 323 for directory in sorted(third_party_dirs): 324 metadata = licenses.ParseDir(directory, REPOSITORY_ROOT, 325 require_license_file=False) 326 license_file = metadata['License File'] 327 if license_file and license_file != licenses.NOT_SHIPPED: 328 content.append(_ReadFile(license_file)) 329 330 return '\n'.join(content) 331 332 333 def _ProcessIncompatibleResult(incompatible_directories): 334 if incompatible_directories: 335 print ("Incompatibly licensed directories found:\n" + 336 "\n".join(sorted(incompatible_directories))) 337 return ScanResult.Errors 338 return ScanResult.Ok 339 340 def main(): 341 class FormatterWithNewLines(optparse.IndentedHelpFormatter): 342 def format_description(self, description): 343 paras = description.split('\n') 344 formatted_paras = [textwrap.fill(para, self.width) for para in paras] 345 return '\n'.join(formatted_paras) + '\n' 346 347 parser = optparse.OptionParser(formatter=FormatterWithNewLines(), 348 usage='%prog [options]') 349 parser.description = (__doc__ + 350 '\nCommands:\n' \ 351 ' scan Check licenses.\n' \ 352 ' notice Generate Android NOTICE file on stdout.\n' \ 353 ' incompatible_directories Scan for incompatibly' 354 ' licensed directories.\n' 355 ' all_incompatible_directories Scan for incompatibly' 356 ' licensed directories (even those in' 357 ' known_issues.py).\n') 358 (_, args) = parser.parse_args() 359 if len(args) != 1: 360 parser.print_help() 361 return ScanResult.Errors 362 363 if args[0] == 'scan': 364 scan_result = _Scan() 365 if scan_result == ScanResult.Ok: 366 print 'OK!' 367 return scan_result 368 elif args[0] == 'notice': 369 print GenerateNoticeFile() 370 return ScanResult.Ok 371 elif args[0] == 'incompatible_directories': 372 return _ProcessIncompatibleResult(GetUnknownIncompatibleDirectories()) 373 elif args[0] == 'all_incompatible_directories': 374 return _ProcessIncompatibleResult(GetIncompatibleDirectories()) 375 parser.print_help() 376 return ScanResult.Errors 377 378 if __name__ == '__main__': 379 sys.exit(main()) 380