Home | History | Annotate | Download | only in build
      1 #!/usr/bin/env python
      2 # Copyright 2014 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """A utility script for downloading versioned Syzygy binaries."""
      7 
      8 import cStringIO
      9 import hashlib
     10 import errno
     11 import json
     12 import logging
     13 import optparse
     14 import os
     15 import re
     16 import shutil
     17 import stat
     18 import sys
     19 import subprocess
     20 import urllib2
     21 import zipfile
     22 
     23 
     24 _LOGGER = logging.getLogger(os.path.basename(__file__))
     25 
     26 # The URL where official builds are archived.
     27 _SYZYGY_ARCHIVE_URL = ('http://syzygy-archive.commondatastorage.googleapis.com/'
     28     'builds/official/%(revision)s')
     29 
     30 # A JSON file containing the state of the download directory. If this file and
     31 # directory state do not agree, then the binaries will be downloaded and
     32 # installed again.
     33 _STATE = '.state'
     34 
     35 # This matches an integer (an SVN revision number) or a SHA1 value (a GIT hash).
     36 # The archive exclusively uses lowercase GIT hashes.
     37 _REVISION_RE = re.compile('^(?:\d+|[a-f0-9]{40})$')
     38 
     39 # This matches an MD5 hash.
     40 _MD5_RE = re.compile('^[a-f0-9]{32}$')
     41 
     42 # List of reources to be downloaded and installed. These are tuples with the
     43 # following format:
     44 # (basename, logging name, relative installation path, extraction filter)
     45 _RESOURCES = [
     46   ('benchmark.zip', 'benchmark', '', None),
     47   ('binaries.zip', 'binaries', 'exe', None),
     48   ('symbols.zip', 'symbols', 'exe',
     49       lambda x: x.filename.endswith('.dll.pdb')),
     50   ('include.zip', 'include', 'include', None),
     51   ('lib.zip', 'library', 'lib', None)]
     52 
     53 
     54 def _Shell(*cmd, **kw):
     55   """Runs |cmd|, returns the results from Popen(cmd).communicate()."""
     56   _LOGGER.debug('Executing %s.', cmd)
     57   prog = subprocess.Popen(cmd, shell=True, **kw)
     58 
     59   stdout, stderr = prog.communicate()
     60   if prog.returncode != 0:
     61     raise RuntimeError('Command "%s" returned %d.' % (cmd, prog.returncode))
     62   return (stdout, stderr)
     63 
     64 
     65 def _LoadState(output_dir):
     66   """Loads the contents of the state file for a given |output_dir|, returning
     67   None if it doesn't exist.
     68   """
     69   path = os.path.join(output_dir, _STATE)
     70   if not os.path.exists(path):
     71     _LOGGER.debug('No state file found.')
     72     return None
     73   with open(path, 'rb') as f:
     74     _LOGGER.debug('Reading state file: %s', path)
     75     try:
     76       return json.load(f)
     77     except ValueError:
     78       _LOGGER.debug('Invalid state file.')
     79       return None
     80 
     81 
     82 def _SaveState(output_dir, state, dry_run=False):
     83   """Saves the |state| dictionary to the given |output_dir| as a JSON file."""
     84   path = os.path.join(output_dir, _STATE)
     85   _LOGGER.debug('Writing state file: %s', path)
     86   if dry_run:
     87     return
     88   with open(path, 'wb') as f:
     89     f.write(json.dumps(state, sort_keys=True, indent=2))
     90 
     91 
     92 def _Md5(path):
     93   """Returns the MD5 hash of the file at |path|, which must exist."""
     94   return hashlib.md5(open(path, 'rb').read()).hexdigest()
     95 
     96 
     97 def _StateIsValid(state):
     98   """Returns true if the given state structure is valid."""
     99   if not isinstance(state, dict):
    100     _LOGGER.debug('State must be a dict.')
    101     return False
    102   r = state.get('revision', None)
    103   if not isinstance(r, basestring) or not _REVISION_RE.match(r):
    104     _LOGGER.debug('State contains an invalid revision.')
    105     return False
    106   c = state.get('contents', None)
    107   if not isinstance(c, dict):
    108     _LOGGER.debug('State must contain a contents dict.')
    109     return False
    110   for (relpath, md5) in c.iteritems():
    111     if not isinstance(relpath, basestring) or len(relpath) == 0:
    112       _LOGGER.debug('State contents dict contains an invalid path.')
    113       return False
    114     if not isinstance(md5, basestring) or not _MD5_RE.match(md5):
    115       _LOGGER.debug('State contents dict contains an invalid MD5 digest.')
    116       return False
    117   return True
    118 
    119 
    120 def _BuildActualState(stored, revision, output_dir):
    121   """Builds the actual state using the provided |stored| state as a template.
    122   Only examines files listed in the stored state, causing the script to ignore
    123   files that have been added to the directories locally. |stored| must be a
    124   valid state dictionary.
    125   """
    126   contents = {}
    127   state = { 'revision': revision, 'contents': contents }
    128   for relpath, md5 in stored['contents'].iteritems():
    129     abspath = os.path.abspath(os.path.join(output_dir, relpath))
    130     if os.path.isfile(abspath):
    131       m = _Md5(abspath)
    132       contents[relpath] = m
    133 
    134   return state
    135 
    136 
    137 def _StatesAreConsistent(stored, actual):
    138   """Validates whether two state dictionaries are consistent. Both must be valid
    139   state dictionaries. Additional entries in |actual| are ignored.
    140   """
    141   if stored['revision'] != actual['revision']:
    142     _LOGGER.debug('Mismatched revision number.')
    143     return False
    144   cont_stored = stored['contents']
    145   cont_actual = actual['contents']
    146   for relpath, md5 in cont_stored.iteritems():
    147     if relpath not in cont_actual:
    148       _LOGGER.debug('Missing content: %s', relpath)
    149       return False
    150     if md5 != cont_actual[relpath]:
    151       _LOGGER.debug('Modified content: %s', relpath)
    152       return False
    153   return True
    154 
    155 
    156 def _GetCurrentState(revision, output_dir):
    157   """Loads the current state and checks to see if it is consistent. Returns
    158   a tuple (state, bool). The returned state will always be valid, even if an
    159   invalid state is present on disk.
    160   """
    161   stored = _LoadState(output_dir)
    162   if not _StateIsValid(stored):
    163     _LOGGER.debug('State is invalid.')
    164     # Return a valid but empty state.
    165     return ({'revision': '0', 'contents': {}}, False)
    166   actual = _BuildActualState(stored, revision, output_dir)
    167   # If the script has been modified consider the state invalid.
    168   path = os.path.join(output_dir, _STATE)
    169   if os.path.getmtime(__file__) > os.path.getmtime(path):
    170     return (stored, False)
    171   # Otherwise, explicitly validate the state.
    172   if not _StatesAreConsistent(stored, actual):
    173     return (stored, False)
    174   return (stored, True)
    175 
    176 
    177 def _DirIsEmpty(path):
    178   """Returns true if the given directory is empty, false otherwise."""
    179   for root, dirs, files in os.walk(path):
    180     return not dirs and not files
    181 
    182 
    183 def _RmTreeHandleReadOnly(func, path, exc):
    184   """An error handling function for use with shutil.rmtree. This will
    185   detect failures to remove read-only files, and will change their properties
    186   prior to removing them. This is necessary on Windows as os.remove will return
    187   an access error for read-only files, and git repos contain read-only
    188   pack/index files.
    189   """
    190   excvalue = exc[1]
    191   if func in (os.rmdir, os.remove) and excvalue.errno == errno.EACCES:
    192     _LOGGER.debug('Removing read-only path: %s', path)
    193     os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
    194     func(path)
    195   else:
    196     raise
    197 
    198 
    199 def _RmTree(path):
    200   """A wrapper of shutil.rmtree that handles read-only files."""
    201   shutil.rmtree(path, ignore_errors=False, onerror=_RmTreeHandleReadOnly)
    202 
    203 
    204 def _CleanState(output_dir, state, dry_run=False):
    205   """Cleans up files/directories in |output_dir| that are referenced by
    206   the given |state|. Raises an error if there are local changes. Returns a
    207   dictionary of files that were deleted.
    208   """
    209   _LOGGER.debug('Deleting files from previous installation.')
    210   deleted = {}
    211 
    212   # Generate a list of files to delete, relative to |output_dir|.
    213   contents = state['contents']
    214   files = sorted(contents.keys())
    215 
    216   # Try to delete the files. Keep track of directories to delete as well.
    217   dirs = {}
    218   for relpath in files:
    219     fullpath = os.path.join(output_dir, relpath)
    220     fulldir = os.path.dirname(fullpath)
    221     dirs[fulldir] = True
    222     if os.path.exists(fullpath):
    223       # If somehow the file has become a directory complain about it.
    224       if os.path.isdir(fullpath):
    225         raise Exception('Directory exists where file expected: %s' % fullpath)
    226 
    227       # Double check that the file doesn't have local changes. If it does
    228       # then refuse to delete it.
    229       if relpath in contents:
    230         stored_md5 = contents[relpath]
    231         actual_md5 = _Md5(fullpath)
    232         if actual_md5 != stored_md5:
    233           raise Exception('File has local changes: %s' % fullpath)
    234 
    235       # The file is unchanged so it can safely be deleted.
    236       _LOGGER.debug('Deleting file "%s".', fullpath)
    237       deleted[relpath] = True
    238       if not dry_run:
    239         os.unlink(fullpath)
    240 
    241   # Sort directories from longest name to shortest. This lets us remove empty
    242   # directories from the most nested paths first.
    243   dirs = sorted(dirs.keys(), key=lambda x: len(x), reverse=True)
    244   for p in dirs:
    245     if os.path.exists(p) and _DirIsEmpty(p):
    246       _LOGGER.debug('Deleting empty directory "%s".', p)
    247       if not dry_run:
    248         _RmTree(p)
    249 
    250   return deleted
    251 
    252 
    253 def _Download(url):
    254   """Downloads the given URL and returns the contents as a string."""
    255   response = urllib2.urlopen(url)
    256   if response.code != 200:
    257     raise RuntimeError('Failed to download "%s".' % url)
    258   return response.read()
    259 
    260 
    261 def _InstallBinaries(options, deleted={}):
    262   """Installs Syzygy binaries. This assumes that the output directory has
    263   already been cleaned, as it will refuse to overwrite existing files."""
    264   contents = {}
    265   state = { 'revision': options.revision, 'contents': contents }
    266   archive_url = _SYZYGY_ARCHIVE_URL % { 'revision': options.revision }
    267   for (base, name, subdir, filt) in _RESOURCES:
    268     # Create the output directory if it doesn't exist.
    269     fulldir = os.path.join(options.output_dir, subdir)
    270     if os.path.isfile(fulldir):
    271       raise Exception('File exists where a directory needs to be created: %s' %
    272                       fulldir)
    273     if not os.path.exists(fulldir):
    274       _LOGGER.debug('Creating directory: %s', fulldir)
    275       if not options.dry_run:
    276         os.makedirs(fulldir)
    277 
    278     # Download the archive.
    279     url = archive_url + '/' + base
    280     _LOGGER.debug('Retrieving %s archive at "%s".', name, url)
    281     data = _Download(url)
    282 
    283     _LOGGER.debug('Unzipping %s archive.', name)
    284     archive = zipfile.ZipFile(cStringIO.StringIO(data))
    285     for entry in archive.infolist():
    286       if not filt or filt(entry):
    287         fullpath = os.path.normpath(os.path.join(fulldir, entry.filename))
    288         relpath = os.path.relpath(fullpath, options.output_dir)
    289         if os.path.exists(fullpath):
    290           # If in a dry-run take into account the fact that the file *would*
    291           # have been deleted.
    292           if options.dry_run and relpath in deleted:
    293             pass
    294           else:
    295             raise Exception('Path already exists: %s' % fullpath)
    296 
    297         # Extract the file and update the state dictionary.
    298         _LOGGER.debug('Extracting "%s".', fullpath)
    299         if not options.dry_run:
    300           archive.extract(entry.filename, fulldir)
    301           md5 = _Md5(fullpath)
    302           contents[relpath] = md5
    303           if sys.platform == 'cygwin':
    304             os.chmod(fullpath, os.stat(fullpath).st_mode | stat.S_IXUSR)
    305 
    306   return state
    307 
    308 
    309 def _ParseCommandLine():
    310   """Parses the command-line and returns an options structure."""
    311   option_parser = optparse.OptionParser()
    312   option_parser.add_option('--dry-run', action='store_true', default=False,
    313       help='If true then will simply list actions that would be performed.')
    314   option_parser.add_option('--force', action='store_true', default=False,
    315       help='Force an installation even if the binaries are up to date.')
    316   option_parser.add_option('--output-dir', type='string',
    317       help='The path where the binaries will be replaced. Existing binaries '
    318            'will only be overwritten if not up to date.')
    319   option_parser.add_option('--overwrite', action='store_true', default=False,
    320       help='If specified then the installation will happily delete and rewrite '
    321            'the entire output directory, blasting any local changes.')
    322   option_parser.add_option('--revision', type='string',
    323       help='The SVN revision or GIT hash associated with the required version.')
    324   option_parser.add_option('--revision-file', type='string',
    325       help='A text file containing an SVN revision or GIT hash.')
    326   option_parser.add_option('--verbose', dest='log_level', action='store_const',
    327       default=logging.INFO, const=logging.DEBUG,
    328       help='Enables verbose logging.')
    329   option_parser.add_option('--quiet', dest='log_level', action='store_const',
    330       default=logging.INFO, const=logging.ERROR,
    331       help='Disables all output except for errors.')
    332   options, args = option_parser.parse_args()
    333   if args:
    334     option_parser.error('Unexpected arguments: %s' % args)
    335   if not options.output_dir:
    336     option_parser.error('Must specify --output-dir.')
    337   if not options.revision and not options.revision_file:
    338     option_parser.error('Must specify one of --revision or --revision-file.')
    339   if options.revision and options.revision_file:
    340     option_parser.error('Must not specify both --revision and --revision-file.')
    341 
    342   # Configure logging.
    343   logging.basicConfig(level=options.log_level)
    344 
    345   # If a revision file has been specified then read it.
    346   if options.revision_file:
    347     options.revision = open(options.revision_file, 'rb').read().strip()
    348     _LOGGER.debug('Parsed revision "%s" from file "%s".',
    349                  options.revision, options.revision_file)
    350 
    351   # Ensure that the specified SVN revision or GIT hash is valid.
    352   if not _REVISION_RE.match(options.revision):
    353     option_parser.error('Must specify a valid SVN or GIT revision.')
    354 
    355   # This just makes output prettier to read.
    356   options.output_dir = os.path.normpath(options.output_dir)
    357 
    358   return options
    359 
    360 
    361 def _RemoveOrphanedFiles(options):
    362   """This is run on non-Windows systems to remove orphaned files that may have
    363   been downloaded by a previous version of this script.
    364   """
    365   # Reconfigure logging to output info messages. This will allow inspection of
    366   # cleanup status on non-Windows buildbots.
    367   _LOGGER.setLevel(logging.INFO)
    368 
    369   output_dir = os.path.abspath(options.output_dir)
    370 
    371   # We only want to clean up the folder in 'src/third_party/syzygy', and we
    372   # expect to be called with that as an output directory. This is an attempt to
    373   # not start deleting random things if the script is run from an alternate
    374   # location, or not called from the gclient hooks.
    375   expected_syzygy_dir = os.path.abspath(os.path.join(
    376       os.path.dirname(__file__), '..', 'third_party', 'syzygy'))
    377   expected_output_dir = os.path.join(expected_syzygy_dir, 'binaries')
    378   if expected_output_dir != output_dir:
    379     _LOGGER.info('Unexpected output directory, skipping cleanup.')
    380     return
    381 
    382   if not os.path.isdir(expected_syzygy_dir):
    383     _LOGGER.info('Output directory does not exist, skipping cleanup.')
    384     return
    385 
    386   def OnError(function, path, excinfo):
    387     """Logs error encountered by shutil.rmtree."""
    388     _LOGGER.error('Error when running %s(%s)', function, path, exc_info=excinfo)
    389 
    390   _LOGGER.info('Removing orphaned files from %s', expected_syzygy_dir)
    391   if not options.dry_run:
    392     shutil.rmtree(expected_syzygy_dir, True, OnError)
    393 
    394 
    395 def main():
    396   options = _ParseCommandLine()
    397 
    398   if options.dry_run:
    399     _LOGGER.debug('Performing a dry-run.')
    400 
    401   # We only care about Windows platforms, as the Syzygy binaries aren't used
    402   # elsewhere. However, there was a short period of time where this script
    403   # wasn't gated on OS types, and those OSes downloaded and installed binaries.
    404   # This will cleanup orphaned files on those operating systems.
    405   if sys.platform not in ('win32', 'cygwin'):
    406     return _RemoveOrphanedFiles(options)
    407 
    408   # Load the current installation state, and validate it against the
    409   # requested installation.
    410   state, is_consistent = _GetCurrentState(options.revision, options.output_dir)
    411 
    412   # Decide whether or not an install is necessary.
    413   if options.force:
    414     _LOGGER.debug('Forcing reinstall of binaries.')
    415   elif is_consistent:
    416     # Avoid doing any work if the contents of the directory are consistent.
    417     _LOGGER.debug('State unchanged, no reinstall necessary.')
    418     return
    419 
    420   # Under normal logging this is the only only message that will be reported.
    421   _LOGGER.info('Installing revision %s Syzygy binaries.',
    422                options.revision[0:12])
    423 
    424   # Clean up the old state to begin with.
    425   deleted = []
    426   if options.overwrite:
    427     if os.path.exists(options.output_dir):
    428       # If overwrite was specified then take a heavy-handed approach.
    429       _LOGGER.debug('Deleting entire installation directory.')
    430       if not options.dry_run:
    431         _RmTree(options.output_dir)
    432   else:
    433     # Otherwise only delete things that the previous installation put in place,
    434     # and take care to preserve any local changes.
    435     deleted = _CleanState(options.output_dir, state, options.dry_run)
    436 
    437   # Install the new binaries. In a dry-run this will actually download the
    438   # archives, but it won't write anything to disk.
    439   state = _InstallBinaries(options, deleted)
    440 
    441   # Build and save the state for the directory.
    442   _SaveState(options.output_dir, state, options.dry_run)
    443 
    444 
    445 if __name__ == '__main__':
    446   main()
    447