Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/python
      2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Checks third-party licenses for the purposes of the Android WebView build.
      7 
      8 The Android tree includes a snapshot of Chromium in order to power the system
      9 WebView.  This tool checks that all code uses open-source licenses compatible
     10 with Android, and that we meet the requirements of those licenses. It can also
     11 be used to generate an Android NOTICE file for the third-party code.
     12 
     13 It makes use of src/tools/licenses.py and the README.chromium files on which
     14 it depends. It also makes use of a data file, third_party_files_whitelist.txt,
     15 which whitelists indicidual files which contain third-party code but which
     16 aren't in a third-party directory with a README.chromium file.
     17 """
     18 
     19 import glob
     20 import imp
     21 import optparse
     22 import os
     23 import re
     24 import subprocess
     25 import sys
     26 import textwrap
     27 
     28 
     29 REPOSITORY_ROOT = os.path.abspath(os.path.join(
     30     os.path.dirname(__file__), '..', '..'))
     31 
     32 # Import third_party/PRESUBMIT.py via imp to avoid importing a random
     33 # PRESUBMIT.py from $PATH, also make sure we don't generate a .pyc file.
     34 sys.dont_write_bytecode = True
     35 third_party = \
     36   imp.load_source('PRESUBMIT', \
     37                   os.path.join(REPOSITORY_ROOT, 'third_party', 'PRESUBMIT.py'))
     38 
     39 sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools'))
     40 import licenses
     41 
     42 import known_issues
     43 
     44 class InputApi(object):
     45   def __init__(self):
     46     self.re = re
     47 
     48 def GetIncompatibleDirectories():
     49   """Gets a list of third-party directories which use licenses incompatible
     50   with Android. This is used by the snapshot tool.
     51   Returns:
     52     A list of directories.
     53   """
     54 
     55   result = []
     56   for directory in _FindThirdPartyDirs():
     57     if directory in known_issues.KNOWN_ISSUES:
     58       result.append(directory)
     59       continue
     60     try:
     61       metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
     62                                    require_license_file=False)
     63     except licenses.LicenseError as e:
     64       print 'Got LicenseError while scanning ' + directory
     65       raise
     66     if metadata.get('License Android Compatible', 'no').upper() == 'YES':
     67       continue
     68     license = re.split(' [Ll]icenses?$', metadata['License'])[0]
     69     if not third_party.LicenseIsCompatibleWithAndroid(InputApi(), license):
     70       result.append(directory)
     71   return result
     72 
     73 def GetUnknownIncompatibleDirectories():
     74   """Gets a list of third-party directories which use licenses incompatible
     75   with Android which are not present in the known_issues.py file.
     76   This is used by the AOSP bot.
     77   Returns:
     78     A list of directories.
     79   """
     80   incompatible_directories = frozenset(GetIncompatibleDirectories())
     81   known_incompatible = []
     82   for path, exclude_list in known_issues.KNOWN_INCOMPATIBLE.iteritems():
     83     for exclude in exclude_list:
     84       if glob.has_magic(exclude):
     85         exclude_dirname = os.path.dirname(exclude)
     86         if glob.has_magic(exclude_dirname):
     87           print ('Exclude path %s contains an unexpected glob expression,' \
     88                  ' skipping.' % exclude)
     89         exclude = exclude_dirname
     90       known_incompatible.append(os.path.normpath(os.path.join(path, exclude)))
     91   known_incompatible = frozenset(known_incompatible)
     92   return incompatible_directories.difference(known_incompatible)
     93 
     94 
     95 class ScanResult(object):
     96   Ok, Warnings, Errors = range(3)
     97 
     98 def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
     99   """Checks that all files which are not in a listed third-party directory,
    100   and which do not use the standard Chromium license, are whitelisted.
    101   Args:
    102     excluded_dirs_list: The list of directories to exclude from scanning.
    103     whitelisted_files: The whitelist of files.
    104   Returns:
    105     ScanResult.Ok if all files with non-standard license headers are whitelisted
    106     and the whitelist contains no stale entries;
    107     ScanResult.Warnings if there are stale entries;
    108     ScanResult.Errors if new non-whitelisted entries found.
    109   """
    110 
    111   excluded_dirs_list = [d for d in excluded_dirs_list if not 'third_party' in d]
    112   # Using a common pattern for third-partyies makes the ignore regexp shorter
    113   excluded_dirs_list.append('third_party')
    114   # VCS dirs
    115   excluded_dirs_list.append('.git')
    116   excluded_dirs_list.append('.svn')
    117   # Build output
    118   excluded_dirs_list.append('out/Debug')
    119   excluded_dirs_list.append('out/Release')
    120   # 'Copyright' appears in license agreements
    121   excluded_dirs_list.append('chrome/app/resources')
    122   # Quickoffice js files from internal src used on buildbots. crbug.com/350472.
    123   excluded_dirs_list.append('chrome/browser/resources/chromeos/quickoffice')
    124   # This is a test output directory
    125   excluded_dirs_list.append('chrome/tools/test/reference_build')
    126   # blink style copy right headers.
    127   excluded_dirs_list.append('content/shell/renderer/test_runner')
    128   # blink style copy right headers.
    129   excluded_dirs_list.append('content/shell/tools/plugin')
    130   # This is tests directory, doesn't exist in the snapshot
    131   excluded_dirs_list.append('content/test/data')
    132   # This is a tests directory that doesn't exist in the shipped product.
    133   excluded_dirs_list.append('gin/test')
    134   # This is a test output directory
    135   excluded_dirs_list.append('data/dom_perf')
    136   # This is a tests directory that doesn't exist in the shipped product.
    137   excluded_dirs_list.append('tools/perf/page_sets')
    138   excluded_dirs_list.append('tools/perf/page_sets/tough_animation_cases')
    139   # Histogram tools, doesn't exist in the snapshot
    140   excluded_dirs_list.append('tools/histograms')
    141   # Swarming tools, doesn't exist in the snapshot
    142   excluded_dirs_list.append('tools/swarming_client')
    143   # Arm sysroot tools, doesn't exist in the snapshot
    144   excluded_dirs_list.append('arm-sysroot')
    145   # Data is not part of open source chromium, but are included on some bots.
    146   excluded_dirs_list.append('data')
    147   # This is not part of open source chromium, but are included on some bots.
    148   excluded_dirs_list.append('skia/tools/clusterfuzz-data')
    149 
    150   args = ['android_webview/tools/find_copyrights.pl',
    151           '.'
    152           ] + excluded_dirs_list
    153   p = subprocess.Popen(args=args, cwd=REPOSITORY_ROOT, stdout=subprocess.PIPE)
    154   lines = p.communicate()[0].splitlines()
    155 
    156   offending_files = []
    157   allowed_copyrights = '^(?:\*No copyright\*' \
    158       '|20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. ' \
    159       'All rights reserved.*)$'
    160   allowed_copyrights_re = re.compile(allowed_copyrights)
    161   for l in lines:
    162     entries = l.split('\t')
    163     if entries[1] == "GENERATED FILE":
    164       continue
    165     copyrights = entries[1].split(' / ')
    166     for c in copyrights:
    167       if c and not allowed_copyrights_re.match(c):
    168         offending_files.append(os.path.normpath(entries[0]))
    169         break
    170 
    171   unknown = set(offending_files) - set(whitelisted_files)
    172   if unknown:
    173     print 'The following files contain a third-party license but are not in ' \
    174           'a listed third-party directory and are not whitelisted. You must ' \
    175           'add the following files to the whitelist.\n%s' % \
    176           '\n'.join(sorted(unknown))
    177 
    178   stale = set(whitelisted_files) - set(offending_files)
    179   if stale:
    180     print 'The following files are whitelisted unnecessarily. You must ' \
    181           'remove the following files from the whitelist.\n%s' % \
    182           '\n'.join(sorted(stale))
    183   missing = [f for f in whitelisted_files if not os.path.exists(f)]
    184   if missing:
    185     print 'The following files are whitelisted, but do not exist.\n%s' % \
    186         '\n'.join(sorted(missing))
    187 
    188   if unknown:
    189     return ScanResult.Errors
    190   elif stale or missing:
    191     return ScanResult.Warnings
    192   else:
    193     return ScanResult.Ok
    194 
    195 
    196 def _ReadFile(path):
    197   """Reads a file from disk.
    198   Args:
    199     path: The path of the file to read, relative to the root of the repository.
    200   Returns:
    201     The contents of the file as a string.
    202   """
    203 
    204   return open(os.path.join(REPOSITORY_ROOT, path), 'rb').read()
    205 
    206 
    207 def _FindThirdPartyDirs():
    208   """Gets the list of third-party directories.
    209   Returns:
    210     The list of third-party directories.
    211   """
    212 
    213   # Please don't add here paths that have problems with license files,
    214   # as they will end up included in Android WebView snapshot.
    215   # Instead, add them into known_issues.py.
    216   prune_paths = [
    217     # Temporary until we figure out how not to check out quickoffice on the
    218     # Android license check bot. Tracked in crbug.com/350472.
    219     os.path.join('chrome', 'browser', 'resources', 'chromeos', 'quickoffice'),
    220     # Placeholder directory, no third-party code.
    221     os.path.join('third_party', 'adobe'),
    222     # Apache 2.0 license. See
    223     # https://code.google.com/p/chromium/issues/detail?id=140478.
    224     os.path.join('third_party', 'bidichecker'),
    225     # Isn't checked out on clients
    226     os.path.join('third_party', 'gles2_conform'),
    227     # The llvm-build doesn't exist for non-clang builder
    228     os.path.join('third_party', 'llvm-build'),
    229     # Binaries doesn't apply to android
    230     os.path.join('third_party', 'widevine'),
    231     # third_party directories in this tree aren't actually third party, but
    232     # provide a way to shadow experimental buildfiles into those directories.
    233     os.path.join('build', 'secondary'),
    234     # Not shipped, Chromium code
    235     os.path.join('tools', 'swarming_client'),
    236   ]
    237   third_party_dirs = licenses.FindThirdPartyDirs(prune_paths, REPOSITORY_ROOT)
    238   return licenses.FilterDirsWithFiles(third_party_dirs, REPOSITORY_ROOT)
    239 
    240 
    241 def _Scan():
    242   """Checks that license meta-data is present for all third-party code and
    243      that all non third-party code doesn't contain external copyrighted code.
    244   Returns:
    245     ScanResult.Ok if everything is in order;
    246     ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist
    247       entries)
    248     ScanResult.Errors otherwise.
    249   """
    250 
    251   third_party_dirs = _FindThirdPartyDirs()
    252 
    253   # First, check designated third-party directories using src/tools/licenses.py.
    254   all_licenses_valid = True
    255   for path in sorted(third_party_dirs):
    256     try:
    257       licenses.ParseDir(path, REPOSITORY_ROOT)
    258     except licenses.LicenseError, e:
    259       if not (path in known_issues.KNOWN_ISSUES):
    260         print 'Got LicenseError "%s" while scanning %s' % (e, path)
    261         all_licenses_valid = False
    262 
    263   # Second, check for non-standard license text.
    264   files_data = _ReadFile(os.path.join('android_webview', 'tools',
    265                                       'third_party_files_whitelist.txt'))
    266   whitelisted_files = []
    267   for line in files_data.splitlines():
    268     match = re.match(r'([^#\s]+)', line)
    269     if match:
    270       whitelisted_files.append(match.group(1))
    271   licenses_check = _CheckLicenseHeaders(third_party_dirs, whitelisted_files)
    272 
    273   return licenses_check if all_licenses_valid else ScanResult.Errors
    274 
    275 
    276 def GenerateNoticeFile():
    277   """Generates the contents of an Android NOTICE file for the third-party code.
    278   This is used by the snapshot tool.
    279   Returns:
    280     The contents of the NOTICE file.
    281   """
    282 
    283   third_party_dirs = _FindThirdPartyDirs()
    284 
    285   # Don't forget Chromium's LICENSE file
    286   content = [_ReadFile('LICENSE')]
    287 
    288   # We provide attribution for all third-party directories.
    289   # TODO(steveblock): Limit this to only code used by the WebView binary.
    290   for directory in sorted(third_party_dirs):
    291     metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
    292                                  require_license_file=False)
    293     license_file = metadata['License File']
    294     if license_file and license_file != licenses.NOT_SHIPPED:
    295       content.append(_ReadFile(license_file))
    296 
    297   return '\n'.join(content)
    298 
    299 
    300 def _ProcessIncompatibleResult(incompatible_directories):
    301   if incompatible_directories:
    302     print ("Incompatibly licensed directories found:\n" +
    303            "\n".join(sorted(incompatible_directories)))
    304     return ScanResult.Errors
    305   return ScanResult.Ok
    306 
    307 def main():
    308   class FormatterWithNewLines(optparse.IndentedHelpFormatter):
    309     def format_description(self, description):
    310       paras = description.split('\n')
    311       formatted_paras = [textwrap.fill(para, self.width) for para in paras]
    312       return '\n'.join(formatted_paras) + '\n'
    313 
    314   parser = optparse.OptionParser(formatter=FormatterWithNewLines(),
    315                                  usage='%prog [options]')
    316   parser.description = (__doc__ +
    317                        '\nCommands:\n' \
    318                        '  scan Check licenses.\n' \
    319                        '  notice Generate Android NOTICE file on stdout.\n' \
    320                        '  incompatible_directories Scan for incompatibly'
    321                        ' licensed directories.\n'
    322                        '  all_incompatible_directories Scan for incompatibly'
    323                        ' licensed directories (even those in'
    324                        ' known_issues.py).\n')
    325   (_, args) = parser.parse_args()
    326   if len(args) != 1:
    327     parser.print_help()
    328     return ScanResult.Errors
    329 
    330   if args[0] == 'scan':
    331     scan_result = _Scan()
    332     if scan_result == ScanResult.Ok:
    333       print 'OK!'
    334     return scan_result
    335   elif args[0] == 'notice':
    336     print GenerateNoticeFile()
    337     return ScanResult.Ok
    338   elif args[0] == 'incompatible_directories':
    339     return _ProcessIncompatibleResult(GetUnknownIncompatibleDirectories())
    340   elif args[0] == 'all_incompatible_directories':
    341     return _ProcessIncompatibleResult(GetIncompatibleDirectories())
    342   parser.print_help()
    343   return ScanResult.Errors
    344 
    345 if __name__ == '__main__':
    346   sys.exit(main())
    347