Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/python
      2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Checks third-party licenses for the purposes of the Android WebView build.
      7 
      8 The Android tree includes a snapshot of Chromium in order to power the system
      9 WebView.  This tool checks that all code uses open-source licenses compatible
     10 with Android, and that we meet the requirements of those licenses. It can also
     11 be used to generate an Android NOTICE file for the third-party code.
     12 
     13 It makes use of src/tools/licenses.py and the README.chromium files on which
     14 it depends. It also makes use of a data file, third_party_files_whitelist.txt,
     15 which whitelists indicidual files which contain third-party code but which
     16 aren't in a third-party directory with a README.chromium file.
     17 """
     18 
     19 import glob
     20 import imp
     21 import multiprocessing
     22 import optparse
     23 import os
     24 import re
     25 import subprocess
     26 import sys
     27 import textwrap
     28 
     29 
     30 REPOSITORY_ROOT = os.path.abspath(os.path.join(
     31     os.path.dirname(__file__), '..', '..'))
     32 
     33 # Import third_party/PRESUBMIT.py via imp to avoid importing a random
     34 # PRESUBMIT.py from $PATH, also make sure we don't generate a .pyc file.
     35 sys.dont_write_bytecode = True
     36 third_party = \
     37   imp.load_source('PRESUBMIT', \
     38                   os.path.join(REPOSITORY_ROOT, 'third_party', 'PRESUBMIT.py'))
     39 
     40 sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools'))
     41 import licenses
     42 
     43 import known_issues
     44 
     45 class InputApi(object):
     46   def __init__(self):
     47     self.re = re
     48 
     49 def GetIncompatibleDirectories():
     50   """Gets a list of third-party directories which use licenses incompatible
     51   with Android. This is used by the snapshot tool.
     52   Returns:
     53     A list of directories.
     54   """
     55 
     56   result = []
     57   for directory in _FindThirdPartyDirs():
     58     if directory in known_issues.KNOWN_ISSUES:
     59       result.append(directory)
     60       continue
     61     try:
     62       metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
     63                                    require_license_file=False)
     64     except licenses.LicenseError as e:
     65       print 'Got LicenseError while scanning ' + directory
     66       raise
     67     if metadata.get('License Android Compatible', 'no').upper() == 'YES':
     68       continue
     69     license = re.split(' [Ll]icenses?$', metadata['License'])[0]
     70     if not third_party.LicenseIsCompatibleWithAndroid(InputApi(), license):
     71       result.append(directory)
     72   return result
     73 
     74 def GetUnknownIncompatibleDirectories():
     75   """Gets a list of third-party directories which use licenses incompatible
     76   with Android which are not present in the known_issues.py file.
     77   This is used by the AOSP bot.
     78   Returns:
     79     A list of directories.
     80   """
     81   incompatible_directories = frozenset(GetIncompatibleDirectories())
     82   known_incompatible = []
     83   for path, exclude_list in known_issues.KNOWN_INCOMPATIBLE.iteritems():
     84     for exclude in exclude_list:
     85       if glob.has_magic(exclude):
     86         exclude_dirname = os.path.dirname(exclude)
     87         if glob.has_magic(exclude_dirname):
     88           print ('Exclude path %s contains an unexpected glob expression,' \
     89                  ' skipping.' % exclude)
     90         exclude = exclude_dirname
     91       known_incompatible.append(os.path.normpath(os.path.join(path, exclude)))
     92   known_incompatible = frozenset(known_incompatible)
     93   return incompatible_directories.difference(known_incompatible)
     94 
     95 
     96 class ScanResult(object):
     97   Ok, Warnings, Errors = range(3)
     98 
     99 # Needs to be a top-level function for multiprocessing
    100 def _FindCopyrights(files_to_scan):
    101   args = [os.path.join('android_webview', 'tools', 'find_copyrights.pl')]
    102   p = subprocess.Popen(
    103     args=args, cwd=REPOSITORY_ROOT,
    104     stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    105   lines = p.communicate(files_to_scan)[0].splitlines()
    106 
    107   offending_files = []
    108   allowed_copyrights = '^(?:\*No copyright\*' \
    109       '|20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. ' \
    110       'All rights reserved.*)$'
    111   allowed_copyrights_re = re.compile(allowed_copyrights)
    112   for l in lines:
    113     entries = l.split('\t')
    114     if entries[1] == "GENERATED FILE":
    115       continue
    116     copyrights = entries[1].split(' / ')
    117     for c in copyrights:
    118       if c and not allowed_copyrights_re.match(c):
    119         offending_files.append(os.path.normpath(entries[0]))
    120         break
    121   return offending_files
    122 
    123 def _ShardString(s, delimiter, shard_len):
    124   result = []
    125   index = 0
    126   last_pos = 0
    127   for m in re.finditer(delimiter, s):
    128     index += 1
    129     if index % shard_len == 0:
    130       result.append(s[last_pos:m.end()])
    131       last_pos = m.end()
    132   if not index % shard_len == 0:
    133     result.append(s[last_pos:])
    134   return result
    135 
    136 def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
    137   """Checks that all files which are not in a listed third-party directory,
    138   and which do not use the standard Chromium license, are whitelisted.
    139   Args:
    140     excluded_dirs_list: The list of directories to exclude from scanning.
    141     whitelisted_files: The whitelist of files.
    142   Returns:
    143     ScanResult.Ok if all files with non-standard license headers are whitelisted
    144     and the whitelist contains no stale entries;
    145     ScanResult.Warnings if there are stale entries;
    146     ScanResult.Errors if new non-whitelisted entries found.
    147   """
    148 
    149   excluded_dirs_list = [d for d in excluded_dirs_list if not 'third_party' in d]
    150   # Using a common pattern for third-partyies makes the ignore regexp shorter
    151   excluded_dirs_list.append('third_party')
    152   # VCS dirs
    153   excluded_dirs_list.append('.git')
    154   excluded_dirs_list.append('.svn')
    155   # Build output
    156   excluded_dirs_list.append('out/Debug')
    157   excluded_dirs_list.append('out/Release')
    158   # 'Copyright' appears in license agreements
    159   excluded_dirs_list.append('chrome/app/resources')
    160   # Quickoffice js files from internal src used on buildbots. crbug.com/350472.
    161   excluded_dirs_list.append('chrome/browser/resources/chromeos/quickoffice')
    162   # This is a test output directory
    163   excluded_dirs_list.append('chrome/tools/test/reference_build')
    164   # blink style copy right headers.
    165   excluded_dirs_list.append('content/shell/renderer/test_runner')
    166   # blink style copy right headers.
    167   excluded_dirs_list.append('content/shell/tools/plugin')
    168   # This is tests directory, doesn't exist in the snapshot
    169   excluded_dirs_list.append('content/test/data')
    170   # This is a tests directory that doesn't exist in the shipped product.
    171   excluded_dirs_list.append('gin/test')
    172   # This is a test output directory
    173   excluded_dirs_list.append('data/dom_perf')
    174   # This is a tests directory that doesn't exist in the shipped product.
    175   excluded_dirs_list.append('tools/perf/page_sets')
    176   excluded_dirs_list.append('tools/perf/page_sets/tough_animation_cases')
    177   # Histogram tools, doesn't exist in the snapshot
    178   excluded_dirs_list.append('tools/histograms')
    179   # Swarming tools, doesn't exist in the snapshot
    180   excluded_dirs_list.append('tools/swarming_client')
    181   # Arm sysroot tools, doesn't exist in the snapshot
    182   excluded_dirs_list.append('arm-sysroot')
    183   # Data is not part of open source chromium, but are included on some bots.
    184   excluded_dirs_list.append('data')
    185   # This is not part of open source chromium, but are included on some bots.
    186   excluded_dirs_list.append('skia/tools/clusterfuzz-data')
    187 
    188   args = [os.path.join('android_webview', 'tools', 'find_files.pl'),
    189           '.'
    190           ] + excluded_dirs_list
    191   p = subprocess.Popen(args=args, cwd=REPOSITORY_ROOT, stdout=subprocess.PIPE)
    192   files_to_scan = p.communicate()[0]
    193 
    194   sharded_files_to_scan = _ShardString(files_to_scan, '\n', 2000)
    195   pool = multiprocessing.Pool()
    196   offending_files_chunks = pool.map_async(
    197       _FindCopyrights, sharded_files_to_scan).get(999999)
    198   pool.close()
    199   pool.join()
    200   # Flatten out the result
    201   offending_files = \
    202     [item for sublist in offending_files_chunks for item in sublist]
    203 
    204   unknown = set(offending_files) - set(whitelisted_files)
    205   if unknown:
    206     print 'The following files contain a third-party license but are not in ' \
    207           'a listed third-party directory and are not whitelisted. You must ' \
    208           'add the following files to the whitelist.\n%s' % \
    209           '\n'.join(sorted(unknown))
    210 
    211   stale = set(whitelisted_files) - set(offending_files)
    212   if stale:
    213     print 'The following files are whitelisted unnecessarily. You must ' \
    214           'remove the following files from the whitelist.\n%s' % \
    215           '\n'.join(sorted(stale))
    216   missing = [f for f in whitelisted_files if not os.path.exists(f)]
    217   if missing:
    218     print 'The following files are whitelisted, but do not exist.\n%s' % \
    219         '\n'.join(sorted(missing))
    220 
    221   if unknown:
    222     return ScanResult.Errors
    223   elif stale or missing:
    224     return ScanResult.Warnings
    225   else:
    226     return ScanResult.Ok
    227 
    228 
    229 def _ReadFile(path):
    230   """Reads a file from disk.
    231   Args:
    232     path: The path of the file to read, relative to the root of the repository.
    233   Returns:
    234     The contents of the file as a string.
    235   """
    236 
    237   return open(os.path.join(REPOSITORY_ROOT, path), 'rb').read()
    238 
    239 
    240 def _FindThirdPartyDirs():
    241   """Gets the list of third-party directories.
    242   Returns:
    243     The list of third-party directories.
    244   """
    245 
    246   # Please don't add here paths that have problems with license files,
    247   # as they will end up included in Android WebView snapshot.
    248   # Instead, add them into known_issues.py.
    249   prune_paths = [
    250     # Temporary until we figure out how not to check out quickoffice on the
    251     # Android license check bot. Tracked in crbug.com/350472.
    252     os.path.join('chrome', 'browser', 'resources', 'chromeos', 'quickoffice'),
    253     # Placeholder directory, no third-party code.
    254     os.path.join('third_party', 'adobe'),
    255     # Apache 2.0 license. See
    256     # https://code.google.com/p/chromium/issues/detail?id=140478.
    257     os.path.join('third_party', 'bidichecker'),
    258     # Isn't checked out on clients
    259     os.path.join('third_party', 'gles2_conform'),
    260     # The llvm-build doesn't exist for non-clang builder
    261     os.path.join('third_party', 'llvm-build'),
    262     # Binaries doesn't apply to android
    263     os.path.join('third_party', 'widevine'),
    264     # third_party directories in this tree aren't actually third party, but
    265     # provide a way to shadow experimental buildfiles into those directories.
    266     os.path.join('build', 'secondary'),
    267     # Not shipped, Chromium code
    268     os.path.join('tools', 'swarming_client'),
    269   ]
    270   third_party_dirs = licenses.FindThirdPartyDirs(prune_paths, REPOSITORY_ROOT)
    271   return licenses.FilterDirsWithFiles(third_party_dirs, REPOSITORY_ROOT)
    272 
    273 
    274 def _Scan():
    275   """Checks that license meta-data is present for all third-party code and
    276      that all non third-party code doesn't contain external copyrighted code.
    277   Returns:
    278     ScanResult.Ok if everything is in order;
    279     ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist
    280       entries)
    281     ScanResult.Errors otherwise.
    282   """
    283 
    284   third_party_dirs = _FindThirdPartyDirs()
    285 
    286   # First, check designated third-party directories using src/tools/licenses.py.
    287   all_licenses_valid = True
    288   for path in sorted(third_party_dirs):
    289     try:
    290       licenses.ParseDir(path, REPOSITORY_ROOT)
    291     except licenses.LicenseError, e:
    292       if not (path in known_issues.KNOWN_ISSUES):
    293         print 'Got LicenseError "%s" while scanning %s' % (e, path)
    294         all_licenses_valid = False
    295 
    296   # Second, check for non-standard license text.
    297   files_data = _ReadFile(os.path.join('android_webview', 'tools',
    298                                       'third_party_files_whitelist.txt'))
    299   whitelisted_files = []
    300   for line in files_data.splitlines():
    301     match = re.match(r'([^#\s]+)', line)
    302     if match:
    303       whitelisted_files.append(match.group(1))
    304   licenses_check = _CheckLicenseHeaders(third_party_dirs, whitelisted_files)
    305 
    306   return licenses_check if all_licenses_valid else ScanResult.Errors
    307 
    308 
    309 def GenerateNoticeFile():
    310   """Generates the contents of an Android NOTICE file for the third-party code.
    311   This is used by the snapshot tool.
    312   Returns:
    313     The contents of the NOTICE file.
    314   """
    315 
    316   third_party_dirs = _FindThirdPartyDirs()
    317 
    318   # Don't forget Chromium's LICENSE file
    319   content = [_ReadFile('LICENSE')]
    320 
    321   # We provide attribution for all third-party directories.
    322   # TODO(steveblock): Limit this to only code used by the WebView binary.
    323   for directory in sorted(third_party_dirs):
    324     metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
    325                                  require_license_file=False)
    326     license_file = metadata['License File']
    327     if license_file and license_file != licenses.NOT_SHIPPED:
    328       content.append(_ReadFile(license_file))
    329 
    330   return '\n'.join(content)
    331 
    332 
    333 def _ProcessIncompatibleResult(incompatible_directories):
    334   if incompatible_directories:
    335     print ("Incompatibly licensed directories found:\n" +
    336            "\n".join(sorted(incompatible_directories)))
    337     return ScanResult.Errors
    338   return ScanResult.Ok
    339 
    340 def main():
    341   class FormatterWithNewLines(optparse.IndentedHelpFormatter):
    342     def format_description(self, description):
    343       paras = description.split('\n')
    344       formatted_paras = [textwrap.fill(para, self.width) for para in paras]
    345       return '\n'.join(formatted_paras) + '\n'
    346 
    347   parser = optparse.OptionParser(formatter=FormatterWithNewLines(),
    348                                  usage='%prog [options]')
    349   parser.description = (__doc__ +
    350                        '\nCommands:\n' \
    351                        '  scan Check licenses.\n' \
    352                        '  notice Generate Android NOTICE file on stdout.\n' \
    353                        '  incompatible_directories Scan for incompatibly'
    354                        ' licensed directories.\n'
    355                        '  all_incompatible_directories Scan for incompatibly'
    356                        ' licensed directories (even those in'
    357                        ' known_issues.py).\n')
    358   (_, args) = parser.parse_args()
    359   if len(args) != 1:
    360     parser.print_help()
    361     return ScanResult.Errors
    362 
    363   if args[0] == 'scan':
    364     scan_result = _Scan()
    365     if scan_result == ScanResult.Ok:
    366       print 'OK!'
    367     return scan_result
    368   elif args[0] == 'notice':
    369     print GenerateNoticeFile()
    370     return ScanResult.Ok
    371   elif args[0] == 'incompatible_directories':
    372     return _ProcessIncompatibleResult(GetUnknownIncompatibleDirectories())
    373   elif args[0] == 'all_incompatible_directories':
    374     return _ProcessIncompatibleResult(GetIncompatibleDirectories())
    375   parser.print_help()
    376   return ScanResult.Errors
    377 
    378 if __name__ == '__main__':
    379   sys.exit(main())
    380