Home | History | Annotate | Download | only in build
      1 #!/usr/bin/env python
      2 # Copyright 2014 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """A utility script for downloading versioned Syzygy binaries."""
      7 
      8 import hashlib
      9 import errno
     10 import json
     11 import logging
     12 import optparse
     13 import os
     14 import re
     15 import shutil
     16 import stat
     17 import sys
     18 import subprocess
     19 import tempfile
     20 import time
     21 import zipfile
     22 
     23 
     24 _LOGGER = logging.getLogger(os.path.basename(__file__))
     25 
     26 # The relative path where official builds are archived in their GS bucket.
     27 _SYZYGY_ARCHIVE_PATH = ('/builds/official/%(revision)s')
     28 
     29 # A JSON file containing the state of the download directory. If this file and
     30 # directory state do not agree, then the binaries will be downloaded and
     31 # installed again.
     32 _STATE = '.state'
     33 
     34 # This matches an integer (an SVN revision number) or a SHA1 value (a GIT hash).
     35 # The archive exclusively uses lowercase GIT hashes.
     36 _REVISION_RE = re.compile('^(?:\d+|[a-f0-9]{40})$')
     37 
     38 # This matches an MD5 hash.
     39 _MD5_RE = re.compile('^[a-f0-9]{32}$')
     40 
     41 # List of reources to be downloaded and installed. These are tuples with the
     42 # following format:
     43 # (basename, logging name, relative installation path, extraction filter)
     44 _RESOURCES = [
     45   ('benchmark.zip', 'benchmark', '', None),
     46   ('binaries.zip', 'binaries', 'exe', None),
     47   ('symbols.zip', 'symbols', 'exe',
     48       lambda x: x.filename.endswith('.dll.pdb'))]
     49 
     50 
     51 def _LoadState(output_dir):
     52   """Loads the contents of the state file for a given |output_dir|, returning
     53   None if it doesn't exist.
     54   """
     55   path = os.path.join(output_dir, _STATE)
     56   if not os.path.exists(path):
     57     _LOGGER.debug('No state file found.')
     58     return None
     59   with open(path, 'rb') as f:
     60     _LOGGER.debug('Reading state file: %s', path)
     61     try:
     62       return json.load(f)
     63     except ValueError:
     64       _LOGGER.debug('Invalid state file.')
     65       return None
     66 
     67 
     68 def _SaveState(output_dir, state, dry_run=False):
     69   """Saves the |state| dictionary to the given |output_dir| as a JSON file."""
     70   path = os.path.join(output_dir, _STATE)
     71   _LOGGER.debug('Writing state file: %s', path)
     72   if dry_run:
     73     return
     74   with open(path, 'wb') as f:
     75     f.write(json.dumps(state, sort_keys=True, indent=2))
     76 
     77 
     78 def _Md5(path):
     79   """Returns the MD5 hash of the file at |path|, which must exist."""
     80   return hashlib.md5(open(path, 'rb').read()).hexdigest()
     81 
     82 
     83 def _StateIsValid(state):
     84   """Returns true if the given state structure is valid."""
     85   if not isinstance(state, dict):
     86     _LOGGER.debug('State must be a dict.')
     87     return False
     88   r = state.get('revision', None)
     89   if not isinstance(r, basestring) or not _REVISION_RE.match(r):
     90     _LOGGER.debug('State contains an invalid revision.')
     91     return False
     92   c = state.get('contents', None)
     93   if not isinstance(c, dict):
     94     _LOGGER.debug('State must contain a contents dict.')
     95     return False
     96   for (relpath, md5) in c.iteritems():
     97     if not isinstance(relpath, basestring) or len(relpath) == 0:
     98       _LOGGER.debug('State contents dict contains an invalid path.')
     99       return False
    100     if not isinstance(md5, basestring) or not _MD5_RE.match(md5):
    101       _LOGGER.debug('State contents dict contains an invalid MD5 digest.')
    102       return False
    103   return True
    104 
    105 
    106 def _BuildActualState(stored, revision, output_dir):
    107   """Builds the actual state using the provided |stored| state as a template.
    108   Only examines files listed in the stored state, causing the script to ignore
    109   files that have been added to the directories locally. |stored| must be a
    110   valid state dictionary.
    111   """
    112   contents = {}
    113   state = { 'revision': revision, 'contents': contents }
    114   for relpath, md5 in stored['contents'].iteritems():
    115     abspath = os.path.abspath(os.path.join(output_dir, relpath))
    116     if os.path.isfile(abspath):
    117       m = _Md5(abspath)
    118       contents[relpath] = m
    119 
    120   return state
    121 
    122 
    123 def _StatesAreConsistent(stored, actual):
    124   """Validates whether two state dictionaries are consistent. Both must be valid
    125   state dictionaries. Additional entries in |actual| are ignored.
    126   """
    127   if stored['revision'] != actual['revision']:
    128     _LOGGER.debug('Mismatched revision number.')
    129     return False
    130   cont_stored = stored['contents']
    131   cont_actual = actual['contents']
    132   for relpath, md5 in cont_stored.iteritems():
    133     if relpath not in cont_actual:
    134       _LOGGER.debug('Missing content: %s', relpath)
    135       return False
    136     if md5 != cont_actual[relpath]:
    137       _LOGGER.debug('Modified content: %s', relpath)
    138       return False
    139   return True
    140 
    141 
    142 def _GetCurrentState(revision, output_dir):
    143   """Loads the current state and checks to see if it is consistent. Returns
    144   a tuple (state, bool). The returned state will always be valid, even if an
    145   invalid state is present on disk.
    146   """
    147   stored = _LoadState(output_dir)
    148   if not _StateIsValid(stored):
    149     _LOGGER.debug('State is invalid.')
    150     # Return a valid but empty state.
    151     return ({'revision': '0', 'contents': {}}, False)
    152   actual = _BuildActualState(stored, revision, output_dir)
    153   # If the script has been modified consider the state invalid.
    154   path = os.path.join(output_dir, _STATE)
    155   if os.path.getmtime(__file__) > os.path.getmtime(path):
    156     return (stored, False)
    157   # Otherwise, explicitly validate the state.
    158   if not _StatesAreConsistent(stored, actual):
    159     return (stored, False)
    160   return (stored, True)
    161 
    162 
    163 def _DirIsEmpty(path):
    164   """Returns true if the given directory is empty, false otherwise."""
    165   for root, dirs, files in os.walk(path):
    166     return not dirs and not files
    167 
    168 
    169 def _RmTreeHandleReadOnly(func, path, exc):
    170   """An error handling function for use with shutil.rmtree. This will
    171   detect failures to remove read-only files, and will change their properties
    172   prior to removing them. This is necessary on Windows as os.remove will return
    173   an access error for read-only files, and git repos contain read-only
    174   pack/index files.
    175   """
    176   excvalue = exc[1]
    177   if func in (os.rmdir, os.remove) and excvalue.errno == errno.EACCES:
    178     _LOGGER.debug('Removing read-only path: %s', path)
    179     os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
    180     func(path)
    181   else:
    182     raise
    183 
    184 
    185 def _RmTree(path):
    186   """A wrapper of shutil.rmtree that handles read-only files."""
    187   shutil.rmtree(path, ignore_errors=False, onerror=_RmTreeHandleReadOnly)
    188 
    189 
    190 def _CleanState(output_dir, state, dry_run=False):
    191   """Cleans up files/directories in |output_dir| that are referenced by
    192   the given |state|. Raises an error if there are local changes. Returns a
    193   dictionary of files that were deleted.
    194   """
    195   _LOGGER.debug('Deleting files from previous installation.')
    196   deleted = {}
    197 
    198   # Generate a list of files to delete, relative to |output_dir|.
    199   contents = state['contents']
    200   files = sorted(contents.keys())
    201 
    202   # Try to delete the files. Keep track of directories to delete as well.
    203   dirs = {}
    204   for relpath in files:
    205     fullpath = os.path.join(output_dir, relpath)
    206     fulldir = os.path.dirname(fullpath)
    207     dirs[fulldir] = True
    208     if os.path.exists(fullpath):
    209       # If somehow the file has become a directory complain about it.
    210       if os.path.isdir(fullpath):
    211         raise Exception('Directory exists where file expected: %s' % fullpath)
    212 
    213       # Double check that the file doesn't have local changes. If it does
    214       # then refuse to delete it.
    215       if relpath in contents:
    216         stored_md5 = contents[relpath]
    217         actual_md5 = _Md5(fullpath)
    218         if actual_md5 != stored_md5:
    219           raise Exception('File has local changes: %s' % fullpath)
    220 
    221       # The file is unchanged so it can safely be deleted.
    222       _LOGGER.debug('Deleting file "%s".', fullpath)
    223       deleted[relpath] = True
    224       if not dry_run:
    225         os.unlink(fullpath)
    226 
    227   # Sort directories from longest name to shortest. This lets us remove empty
    228   # directories from the most nested paths first.
    229   dirs = sorted(dirs.keys(), key=lambda x: len(x), reverse=True)
    230   for p in dirs:
    231     if os.path.exists(p) and _DirIsEmpty(p):
    232       _LOGGER.debug('Deleting empty directory "%s".', p)
    233       if not dry_run:
    234         _RmTree(p)
    235 
    236   return deleted
    237 
    238 
    239 def _FindGsUtil():
    240   """Looks for depot_tools and returns the absolute path to gsutil.py."""
    241   for path in os.environ['PATH'].split(os.pathsep):
    242     path = os.path.abspath(path)
    243     git_cl = os.path.join(path, 'git_cl.py')
    244     gs_util = os.path.join(path, 'gsutil.py')
    245     if os.path.exists(git_cl) and os.path.exists(gs_util):
    246       return gs_util
    247   return None
    248 
    249 
    250 def _GsUtil(*cmd):
    251   """Runs the given command in gsutil with exponential backoff and retries."""
    252   gs_util = _FindGsUtil()
    253   cmd = [sys.executable, gs_util] + list(cmd)
    254 
    255   retries = 3
    256   timeout = 4  # Seconds.
    257   while True:
    258     _LOGGER.debug('Running %s', cmd)
    259     prog = subprocess.Popen(cmd, shell=False)
    260     prog.communicate()
    261 
    262     # Stop retrying on success.
    263     if prog.returncode == 0:
    264       return
    265 
    266     # Raise a permanent failure if retries have been exhausted.
    267     if retries == 0:
    268       raise RuntimeError('Command "%s" returned %d.' % (cmd, prog.returncode))
    269 
    270     _LOGGER.debug('Sleeping %d seconds and trying again.', timeout)
    271     time.sleep(timeout)
    272     retries -= 1
    273     timeout *= 2
    274 
    275 
    276 def _Download(resource):
    277   """Downloads the given GS resource to a temporary file, returning its path."""
    278   tmp = tempfile.mkstemp(suffix='syzygy_archive')
    279   os.close(tmp[0])
    280   url = 'gs://syzygy-archive' + resource
    281   _GsUtil('cp', url, tmp[1])
    282   return tmp[1]
    283 
    284 
    285 def _InstallBinaries(options, deleted={}):
    286   """Installs Syzygy binaries. This assumes that the output directory has
    287   already been cleaned, as it will refuse to overwrite existing files."""
    288   contents = {}
    289   state = { 'revision': options.revision, 'contents': contents }
    290   archive_path = _SYZYGY_ARCHIVE_PATH % { 'revision': options.revision }
    291   if options.resources:
    292     resources = [(resource, resource, '', None)
    293                  for resource in options.resources]
    294   else:
    295     resources = _RESOURCES
    296   for (base, name, subdir, filt) in resources:
    297     # Create the output directory if it doesn't exist.
    298     fulldir = os.path.join(options.output_dir, subdir)
    299     if os.path.isfile(fulldir):
    300       raise Exception('File exists where a directory needs to be created: %s' %
    301                       fulldir)
    302     if not os.path.exists(fulldir):
    303       _LOGGER.debug('Creating directory: %s', fulldir)
    304       if not options.dry_run:
    305         os.makedirs(fulldir)
    306 
    307     # Download and read the archive.
    308     resource = archive_path + '/' + base
    309     _LOGGER.debug('Retrieving %s archive at "%s".', name, resource)
    310     path = _Download(resource)
    311 
    312     _LOGGER.debug('Unzipping %s archive.', name)
    313     with open(path, 'rb') as data:
    314       archive = zipfile.ZipFile(data)
    315       for entry in archive.infolist():
    316         if not filt or filt(entry):
    317           fullpath = os.path.normpath(os.path.join(fulldir, entry.filename))
    318           relpath = os.path.relpath(fullpath, options.output_dir)
    319           if os.path.exists(fullpath):
    320             # If in a dry-run take into account the fact that the file *would*
    321             # have been deleted.
    322             if options.dry_run and relpath in deleted:
    323               pass
    324             else:
    325               raise Exception('Path already exists: %s' % fullpath)
    326 
    327           # Extract the file and update the state dictionary.
    328           _LOGGER.debug('Extracting "%s".', fullpath)
    329           if not options.dry_run:
    330             archive.extract(entry.filename, fulldir)
    331             md5 = _Md5(fullpath)
    332             contents[relpath] = md5
    333             if sys.platform == 'cygwin':
    334               os.chmod(fullpath, os.stat(fullpath).st_mode | stat.S_IXUSR)
    335 
    336     _LOGGER.debug('Removing temporary file "%s".', path)
    337     os.remove(path)
    338 
    339   return state
    340 
    341 
    342 def _ParseCommandLine():
    343   """Parses the command-line and returns an options structure."""
    344   option_parser = optparse.OptionParser()
    345   option_parser.add_option('--dry-run', action='store_true', default=False,
    346       help='If true then will simply list actions that would be performed.')
    347   option_parser.add_option('--force', action='store_true', default=False,
    348       help='Force an installation even if the binaries are up to date.')
    349   option_parser.add_option('--no-cleanup', action='store_true', default=False,
    350       help='Allow installation on non-Windows platforms, and skip the forced '
    351            'cleanup step.')
    352   option_parser.add_option('--output-dir', type='string',
    353       help='The path where the binaries will be replaced. Existing binaries '
    354            'will only be overwritten if not up to date.')
    355   option_parser.add_option('--overwrite', action='store_true', default=False,
    356       help='If specified then the installation will happily delete and rewrite '
    357            'the entire output directory, blasting any local changes.')
    358   option_parser.add_option('--revision', type='string',
    359       help='The SVN revision or GIT hash associated with the required version.')
    360   option_parser.add_option('--revision-file', type='string',
    361       help='A text file containing an SVN revision or GIT hash.')
    362   option_parser.add_option('--resource', type='string', action='append',
    363       dest='resources', help='A resource to be downloaded.')
    364   option_parser.add_option('--verbose', dest='log_level', action='store_const',
    365       default=logging.INFO, const=logging.DEBUG,
    366       help='Enables verbose logging.')
    367   option_parser.add_option('--quiet', dest='log_level', action='store_const',
    368       default=logging.INFO, const=logging.ERROR,
    369       help='Disables all output except for errors.')
    370   options, args = option_parser.parse_args()
    371   if args:
    372     option_parser.error('Unexpected arguments: %s' % args)
    373   if not options.output_dir:
    374     option_parser.error('Must specify --output-dir.')
    375   if not options.revision and not options.revision_file:
    376     option_parser.error('Must specify one of --revision or --revision-file.')
    377   if options.revision and options.revision_file:
    378     option_parser.error('Must not specify both --revision and --revision-file.')
    379 
    380   # Configure logging.
    381   logging.basicConfig(level=options.log_level)
    382 
    383   # If a revision file has been specified then read it.
    384   if options.revision_file:
    385     options.revision = open(options.revision_file, 'rb').read().strip()
    386     _LOGGER.debug('Parsed revision "%s" from file "%s".',
    387                  options.revision, options.revision_file)
    388 
    389   # Ensure that the specified SVN revision or GIT hash is valid.
    390   if not _REVISION_RE.match(options.revision):
    391     option_parser.error('Must specify a valid SVN or GIT revision.')
    392 
    393   # This just makes output prettier to read.
    394   options.output_dir = os.path.normpath(options.output_dir)
    395 
    396   return options
    397 
    398 
    399 def _RemoveOrphanedFiles(options):
    400   """This is run on non-Windows systems to remove orphaned files that may have
    401   been downloaded by a previous version of this script.
    402   """
    403   # Reconfigure logging to output info messages. This will allow inspection of
    404   # cleanup status on non-Windows buildbots.
    405   _LOGGER.setLevel(logging.INFO)
    406 
    407   output_dir = os.path.abspath(options.output_dir)
    408 
    409   # We only want to clean up the folder in 'src/third_party/syzygy', and we
    410   # expect to be called with that as an output directory. This is an attempt to
    411   # not start deleting random things if the script is run from an alternate
    412   # location, or not called from the gclient hooks.
    413   expected_syzygy_dir = os.path.abspath(os.path.join(
    414       os.path.dirname(__file__), '..', 'third_party', 'syzygy'))
    415   expected_output_dir = os.path.join(expected_syzygy_dir, 'binaries')
    416   if expected_output_dir != output_dir:
    417     _LOGGER.info('Unexpected output directory, skipping cleanup.')
    418     return
    419 
    420   if not os.path.isdir(expected_syzygy_dir):
    421     _LOGGER.info('Output directory does not exist, skipping cleanup.')
    422     return
    423 
    424   def OnError(function, path, excinfo):
    425     """Logs error encountered by shutil.rmtree."""
    426     _LOGGER.error('Error when running %s(%s)', function, path, exc_info=excinfo)
    427 
    428   _LOGGER.info('Removing orphaned files from %s', expected_syzygy_dir)
    429   if not options.dry_run:
    430     shutil.rmtree(expected_syzygy_dir, True, OnError)
    431 
    432 
    433 def main():
    434   options = _ParseCommandLine()
    435 
    436   if options.dry_run:
    437     _LOGGER.debug('Performing a dry-run.')
    438 
    439   # We only care about Windows platforms, as the Syzygy binaries aren't used
    440   # elsewhere. However, there was a short period of time where this script
    441   # wasn't gated on OS types, and those OSes downloaded and installed binaries.
    442   # This will cleanup orphaned files on those operating systems.
    443   if sys.platform not in ('win32', 'cygwin'):
    444     if options.no_cleanup:
    445       _LOGGER.debug('Skipping usual cleanup for non-Windows platforms.')
    446     else:
    447       return _RemoveOrphanedFiles(options)
    448 
    449   # Load the current installation state, and validate it against the
    450   # requested installation.
    451   state, is_consistent = _GetCurrentState(options.revision, options.output_dir)
    452 
    453   # Decide whether or not an install is necessary.
    454   if options.force:
    455     _LOGGER.debug('Forcing reinstall of binaries.')
    456   elif is_consistent:
    457     # Avoid doing any work if the contents of the directory are consistent.
    458     _LOGGER.debug('State unchanged, no reinstall necessary.')
    459     return
    460 
    461   # Under normal logging this is the only only message that will be reported.
    462   _LOGGER.info('Installing revision %s Syzygy binaries.',
    463                options.revision[0:12])
    464 
    465   # Clean up the old state to begin with.
    466   deleted = []
    467   if options.overwrite:
    468     if os.path.exists(options.output_dir):
    469       # If overwrite was specified then take a heavy-handed approach.
    470       _LOGGER.debug('Deleting entire installation directory.')
    471       if not options.dry_run:
    472         _RmTree(options.output_dir)
    473   else:
    474     # Otherwise only delete things that the previous installation put in place,
    475     # and take care to preserve any local changes.
    476     deleted = _CleanState(options.output_dir, state, options.dry_run)
    477 
    478   # Install the new binaries. In a dry-run this will actually download the
    479   # archives, but it won't write anything to disk.
    480   state = _InstallBinaries(options, deleted)
    481 
    482   # Build and save the state for the directory.
    483   _SaveState(options.output_dir, state, options.dry_run)
    484 
    485 
    486 if __name__ == '__main__':
    487   main()
    488