Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/python
      2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Checks third-party licenses for the purposes of the Android WebView build.
      7 
      8 The Android tree includes a snapshot of Chromium in order to power the system
      9 WebView.  This tool checks that all code uses open-source licenses compatible
     10 with Android, and that we meet the requirements of those licenses. It can also
     11 be used to generate an Android NOTICE file for the third-party code.
     12 
     13 It makes use of src/tools/licenses.py and the README.chromium files on which
     14 it depends. It also makes use of a data file, third_party_files_whitelist.txt,
     15 which whitelists indicidual files which contain third-party code but which
     16 aren't in a third-party directory with a README.chromium file.
     17 """
     18 
     19 import optparse
     20 import os
     21 import re
     22 import subprocess
     23 import sys
     24 import textwrap
     25 
     26 
     27 REPOSITORY_ROOT = os.path.abspath(os.path.join(
     28     os.path.dirname(__file__), '..', '..'))
     29 
     30 sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools'))
     31 import licenses
     32 
     33 import known_issues
     34 
     35 def GetIncompatibleDirectories():
     36   """Gets a list of third-party directories which use licenses incompatible
     37   with Android. This is used by the snapshot tool.
     38   Returns:
     39     A list of directories.
     40   """
     41 
     42   whitelist = [
     43     'Apache( Version)? 2(\.0)?',
     44     '(New )?([23]-Clause )?BSD( [23]-Clause)?( with advertising clause)?',
     45     'L?GPL ?v?2(\.[01])?( or later)?',
     46     'MIT(/X11)?(-like)?',
     47     'MPL 1\.1 ?/ ?GPL 2(\.0)? ?/ ?LGPL 2\.1',
     48     'MPL 2(\.0)?',
     49     'Microsoft Limited Public License',
     50     'Microsoft Permissive License',
     51     'Public Domain',
     52     'SGI Free Software License B',
     53     'X11',
     54   ]
     55   regex = '^(%s)$' % '|'.join(whitelist)
     56   result = []
     57   for directory in _FindThirdPartyDirs():
     58     if directory in known_issues.KNOWN_ISSUES:
     59       result.append(directory)
     60       continue
     61     try:
     62       metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
     63                                    require_license_file=False)
     64     except licenses.LicenseError as e:
     65       print 'Got LicenseError while scanning ' + directory
     66       raise
     67     if metadata.get('License Android Compatible', 'no').upper() == 'YES':
     68       continue
     69     license = re.split(' [Ll]icenses?$', metadata['License'])[0]
     70     tokens = [x.strip() for x in re.split(' and |,', license) if len(x) > 0]
     71     for token in tokens:
     72       if not re.match(regex, token, re.IGNORECASE):
     73         result.append(directory)
     74         break
     75   return result
     76 
     77 class ScanResult(object):
     78   Ok, Warnings, Errors = range(3)
     79 
     80 def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
     81   """Checks that all files which are not in a listed third-party directory,
     82   and which do not use the standard Chromium license, are whitelisted.
     83   Args:
     84     excluded_dirs_list: The list of directories to exclude from scanning.
     85     whitelisted_files: The whitelist of files.
     86   Returns:
     87     ScanResult.Ok if all files with non-standard license headers are whitelisted
     88     and the whitelist contains no stale entries;
     89     ScanResult.Warnings if there are stale entries;
     90     ScanResult.Errors if new non-whitelisted entries found.
     91   """
     92 
     93   excluded_dirs_list = [d for d in excluded_dirs_list if not 'third_party' in d]
     94   # Using a common pattern for third-partyies makes the ignore regexp shorter
     95   excluded_dirs_list.append('third_party')
     96   # VCS dirs
     97   excluded_dirs_list.append('.git')
     98   excluded_dirs_list.append('.svn')
     99   # Build output
    100   excluded_dirs_list.append('out/Debug')
    101   excluded_dirs_list.append('out/Release')
    102   # 'Copyright' appears in license agreements
    103   excluded_dirs_list.append('chrome/app/resources')
    104   # This is a test output directory
    105   excluded_dirs_list.append('chrome/tools/test/reference_build')
    106   # This is tests directory, doesn't exist in the snapshot
    107   excluded_dirs_list.append('content/test/data')
    108   # This is a tests directory that doesn't exist in the shipped product.
    109   excluded_dirs_list.append('gin/test')
    110   # This is a test output directory
    111   excluded_dirs_list.append('data/dom_perf')
    112   # Histogram tools, doesn't exist in the snapshot
    113   excluded_dirs_list.append('tools/histograms')
    114   # Swarming tools, doesn't exist in the snapshot
    115   excluded_dirs_list.append('tools/swarming_client')
    116   # Arm sysroot tools, doesn't exist in the snapshot
    117   excluded_dirs_list.append('arm-sysroot')
    118   # Data is not part of open source chromium, but are included on some bots.
    119   excluded_dirs_list.append('data')
    120 
    121   args = ['android_webview/tools/find_copyrights.pl',
    122           '.'
    123           ] + excluded_dirs_list
    124   p = subprocess.Popen(args=args, cwd=REPOSITORY_ROOT, stdout=subprocess.PIPE)
    125   lines = p.communicate()[0].splitlines()
    126 
    127   offending_files = []
    128   allowed_copyrights = '^(?:\*No copyright\*' \
    129       '|20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. ' \
    130       'All rights reserved.*)$'
    131   allowed_copyrights_re = re.compile(allowed_copyrights)
    132   for l in lines:
    133     entries = l.split('\t')
    134     if entries[1] == "GENERATED FILE":
    135       continue
    136     copyrights = entries[1].split(' / ')
    137     for c in copyrights:
    138       if c and not allowed_copyrights_re.match(c):
    139         offending_files.append(os.path.normpath(entries[0]))
    140         break
    141 
    142   unknown = set(offending_files) - set(whitelisted_files)
    143   if unknown:
    144     print 'The following files contain a third-party license but are not in ' \
    145           'a listed third-party directory and are not whitelisted. You must ' \
    146           'add the following files to the whitelist.\n%s' % \
    147           '\n'.join(sorted(unknown))
    148 
    149   stale = set(whitelisted_files) - set(offending_files)
    150   if stale:
    151     print 'The following files are whitelisted unnecessarily. You must ' \
    152           ' remove the following files from the whitelist.\n%s' % \
    153           '\n'.join(sorted(stale))
    154 
    155   if unknown:
    156     return ScanResult.Errors
    157   elif stale:
    158     return ScanResult.Warnings
    159   else:
    160     return ScanResult.Ok
    161 
    162 
    163 def _ReadFile(path):
    164   """Reads a file from disk.
    165   Args:
    166     path: The path of the file to read, relative to the root of the repository.
    167   Returns:
    168     The contents of the file as a string.
    169   """
    170 
    171   return open(os.path.join(REPOSITORY_ROOT, path), 'rb').read()
    172 
    173 
    174 def _FindThirdPartyDirs():
    175   """Gets the list of third-party directories.
    176   Returns:
    177     The list of third-party directories.
    178   """
    179 
    180   # Please don't add here paths that have problems with license files,
    181   # as they will end up included in Android WebView snapshot.
    182   # Instead, add them into known_issues.py.
    183   prune_paths = [
    184     # Placeholder directory, no third-party code.
    185     os.path.join('third_party', 'adobe'),
    186     # Apache 2.0 license. See
    187     # https://code.google.com/p/chromium/issues/detail?id=140478.
    188     os.path.join('third_party', 'bidichecker'),
    189     # Isn't checked out on clients
    190     os.path.join('third_party', 'gles2_conform'),
    191     # The llvm-build doesn't exist for non-clang builder
    192     os.path.join('third_party', 'llvm-build'),
    193     # Binaries doesn't apply to android
    194     os.path.join('third_party', 'widevine'),
    195     # third_party directories in this tree aren't actually third party, but
    196     # provide a way to shadow experimental buildfiles into those directories.
    197     os.path.join('tools', 'gn', 'secondary'),
    198     # Not shipped, Chromium code
    199     os.path.join('tools', 'swarming_client'),
    200   ]
    201   third_party_dirs = licenses.FindThirdPartyDirs(prune_paths, REPOSITORY_ROOT)
    202   return licenses.FilterDirsWithFiles(third_party_dirs, REPOSITORY_ROOT)
    203 
    204 
    205 def _Scan():
    206   """Checks that license meta-data is present for all third-party code and
    207      that all non third-party code doesn't contain external copyrighted code.
    208   Returns:
    209     ScanResult.Ok if everything is in order;
    210     ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist
    211       entries)
    212     ScanResult.Errors otherwise.
    213   """
    214 
    215   third_party_dirs = _FindThirdPartyDirs()
    216 
    217   # First, check designated third-party directories using src/tools/licenses.py.
    218   all_licenses_valid = True
    219   for path in sorted(third_party_dirs):
    220     try:
    221       licenses.ParseDir(path, REPOSITORY_ROOT)
    222     except licenses.LicenseError, e:
    223       if not (path in known_issues.KNOWN_ISSUES):
    224         print 'Got LicenseError "%s" while scanning %s' % (e, path)
    225         all_licenses_valid = False
    226 
    227   # Second, check for non-standard license text.
    228   files_data = _ReadFile(os.path.join('android_webview', 'tools',
    229                                       'third_party_files_whitelist.txt'))
    230   whitelisted_files = []
    231   for line in files_data.splitlines():
    232     match = re.match(r'([^#\s]+)', line)
    233     if match:
    234       whitelisted_files.append(match.group(1))
    235   licenses_check = _CheckLicenseHeaders(third_party_dirs, whitelisted_files)
    236 
    237   return licenses_check if all_licenses_valid else ScanResult.Errors
    238 
    239 
    240 def GenerateNoticeFile():
    241   """Generates the contents of an Android NOTICE file for the third-party code.
    242   This is used by the snapshot tool.
    243   Returns:
    244     The contents of the NOTICE file.
    245   """
    246 
    247   third_party_dirs = _FindThirdPartyDirs()
    248 
    249   # Don't forget Chromium's LICENSE file
    250   content = [_ReadFile('LICENSE')]
    251 
    252   # We provide attribution for all third-party directories.
    253   # TODO(steveblock): Limit this to only code used by the WebView binary.
    254   for directory in sorted(third_party_dirs):
    255     metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
    256                                  require_license_file=False)
    257     license_file = metadata['License File']
    258     if license_file and license_file != licenses.NOT_SHIPPED:
    259       content.append(_ReadFile(license_file))
    260 
    261   return '\n'.join(content)
    262 
    263 
    264 def main():
    265   class FormatterWithNewLines(optparse.IndentedHelpFormatter):
    266     def format_description(self, description):
    267       paras = description.split('\n')
    268       formatted_paras = [textwrap.fill(para, self.width) for para in paras]
    269       return '\n'.join(formatted_paras) + '\n'
    270 
    271   parser = optparse.OptionParser(formatter=FormatterWithNewLines(),
    272                                  usage='%prog [options]')
    273   parser.description = (__doc__ +
    274                        '\nCommands:\n' \
    275                        '  scan  Check licenses.\n' \
    276                        '  notice Generate Android NOTICE file on stdout')
    277   (options, args) = parser.parse_args()
    278   if len(args) != 1:
    279     parser.print_help()
    280     return ScanResult.Errors
    281 
    282   if args[0] == 'scan':
    283     scan_result = _Scan()
    284     if scan_result == ScanResult.Ok:
    285       print 'OK!'
    286     return scan_result
    287   elif args[0] == 'notice':
    288     print GenerateNoticeFile()
    289     return ScanResult.Ok
    290 
    291   parser.print_help()
    292   return ScanResult.Errors
    293 
    294 if __name__ == '__main__':
    295   sys.exit(main())
    296