1 #!/usr/bin/env python 2 # Copyright 2014 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """A utility script for downloading versioned Syzygy binaries.""" 7 8 import cStringIO 9 import hashlib 10 import errno 11 import json 12 import logging 13 import optparse 14 import os 15 import re 16 import shutil 17 import stat 18 import sys 19 import subprocess 20 import urllib2 21 import zipfile 22 23 24 _LOGGER = logging.getLogger(os.path.basename(__file__)) 25 26 # The URL where official builds are archived. 27 _SYZYGY_ARCHIVE_URL = ('http://syzygy-archive.commondatastorage.googleapis.com/' 28 'builds/official/%(revision)s') 29 30 # A JSON file containing the state of the download directory. If this file and 31 # directory state do not agree, then the binaries will be downloaded and 32 # installed again. 33 _STATE = '.state' 34 35 # This matches an integer (an SVN revision number) or a SHA1 value (a GIT hash). 36 # The archive exclusively uses lowercase GIT hashes. 37 _REVISION_RE = re.compile('^(?:\d+|[a-f0-9]{40})$') 38 39 # This matches an MD5 hash. 40 _MD5_RE = re.compile('^[a-f0-9]{32}$') 41 42 # List of reources to be downloaded and installed. These are tuples with the 43 # following format: 44 # (basename, logging name, relative installation path, extraction filter) 45 _RESOURCES = [ 46 ('benchmark.zip', 'benchmark', '', None), 47 ('binaries.zip', 'binaries', 'exe', None), 48 ('symbols.zip', 'symbols', 'exe', 49 lambda x: x.filename.endswith('.dll.pdb')), 50 ('include.zip', 'include', 'include', None), 51 ('lib.zip', 'library', 'lib', None)] 52 53 54 def _Shell(*cmd, **kw): 55 """Runs |cmd|, returns the results from Popen(cmd).communicate().""" 56 _LOGGER.debug('Executing %s.', cmd) 57 prog = subprocess.Popen(cmd, shell=True, **kw) 58 59 stdout, stderr = prog.communicate() 60 if prog.returncode != 0: 61 raise RuntimeError('Command "%s" returned %d.' % (cmd, prog.returncode)) 62 return (stdout, stderr) 63 64 65 def _LoadState(output_dir): 66 """Loads the contents of the state file for a given |output_dir|, returning 67 None if it doesn't exist. 68 """ 69 path = os.path.join(output_dir, _STATE) 70 if not os.path.exists(path): 71 _LOGGER.debug('No state file found.') 72 return None 73 with open(path, 'rb') as f: 74 _LOGGER.debug('Reading state file: %s', path) 75 try: 76 return json.load(f) 77 except ValueError: 78 _LOGGER.debug('Invalid state file.') 79 return None 80 81 82 def _SaveState(output_dir, state, dry_run=False): 83 """Saves the |state| dictionary to the given |output_dir| as a JSON file.""" 84 path = os.path.join(output_dir, _STATE) 85 _LOGGER.debug('Writing state file: %s', path) 86 if dry_run: 87 return 88 with open(path, 'wb') as f: 89 f.write(json.dumps(state, sort_keys=True, indent=2)) 90 91 92 def _Md5(path): 93 """Returns the MD5 hash of the file at |path|, which must exist.""" 94 return hashlib.md5(open(path, 'rb').read()).hexdigest() 95 96 97 def _StateIsValid(state): 98 """Returns true if the given state structure is valid.""" 99 if not isinstance(state, dict): 100 _LOGGER.debug('State must be a dict.') 101 return False 102 r = state.get('revision', None) 103 if not isinstance(r, basestring) or not _REVISION_RE.match(r): 104 _LOGGER.debug('State contains an invalid revision.') 105 return False 106 c = state.get('contents', None) 107 if not isinstance(c, dict): 108 _LOGGER.debug('State must contain a contents dict.') 109 return False 110 for (relpath, md5) in c.iteritems(): 111 if not isinstance(relpath, basestring) or len(relpath) == 0: 112 _LOGGER.debug('State contents dict contains an invalid path.') 113 return False 114 if not isinstance(md5, basestring) or not _MD5_RE.match(md5): 115 _LOGGER.debug('State contents dict contains an invalid MD5 digest.') 116 return False 117 return True 118 119 120 def _BuildActualState(stored, revision, output_dir): 121 """Builds the actual state using the provided |stored| state as a template. 122 Only examines files listed in the stored state, causing the script to ignore 123 files that have been added to the directories locally. |stored| must be a 124 valid state dictionary. 125 """ 126 contents = {} 127 state = { 'revision': revision, 'contents': contents } 128 for relpath, md5 in stored['contents'].iteritems(): 129 abspath = os.path.abspath(os.path.join(output_dir, relpath)) 130 if os.path.isfile(abspath): 131 m = _Md5(abspath) 132 contents[relpath] = m 133 134 return state 135 136 137 def _StatesAreConsistent(stored, actual): 138 """Validates whether two state dictionaries are consistent. Both must be valid 139 state dictionaries. Additional entries in |actual| are ignored. 140 """ 141 if stored['revision'] != actual['revision']: 142 _LOGGER.debug('Mismatched revision number.') 143 return False 144 cont_stored = stored['contents'] 145 cont_actual = actual['contents'] 146 for relpath, md5 in cont_stored.iteritems(): 147 if relpath not in cont_actual: 148 _LOGGER.debug('Missing content: %s', relpath) 149 return False 150 if md5 != cont_actual[relpath]: 151 _LOGGER.debug('Modified content: %s', relpath) 152 return False 153 return True 154 155 156 def _GetCurrentState(revision, output_dir): 157 """Loads the current state and checks to see if it is consistent. Returns 158 a tuple (state, bool). The returned state will always be valid, even if an 159 invalid state is present on disk. 160 """ 161 stored = _LoadState(output_dir) 162 if not _StateIsValid(stored): 163 _LOGGER.debug('State is invalid.') 164 # Return a valid but empty state. 165 return ({'revision': '0', 'contents': {}}, False) 166 actual = _BuildActualState(stored, revision, output_dir) 167 # If the script has been modified consider the state invalid. 168 path = os.path.join(output_dir, _STATE) 169 if os.path.getmtime(__file__) > os.path.getmtime(path): 170 return (stored, False) 171 # Otherwise, explicitly validate the state. 172 if not _StatesAreConsistent(stored, actual): 173 return (stored, False) 174 return (stored, True) 175 176 177 def _DirIsEmpty(path): 178 """Returns true if the given directory is empty, false otherwise.""" 179 for root, dirs, files in os.walk(path): 180 return not dirs and not files 181 182 183 def _RmTreeHandleReadOnly(func, path, exc): 184 """An error handling function for use with shutil.rmtree. This will 185 detect failures to remove read-only files, and will change their properties 186 prior to removing them. This is necessary on Windows as os.remove will return 187 an access error for read-only files, and git repos contain read-only 188 pack/index files. 189 """ 190 excvalue = exc[1] 191 if func in (os.rmdir, os.remove) and excvalue.errno == errno.EACCES: 192 _LOGGER.debug('Removing read-only path: %s', path) 193 os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) 194 func(path) 195 else: 196 raise 197 198 199 def _RmTree(path): 200 """A wrapper of shutil.rmtree that handles read-only files.""" 201 shutil.rmtree(path, ignore_errors=False, onerror=_RmTreeHandleReadOnly) 202 203 204 def _CleanState(output_dir, state, dry_run=False): 205 """Cleans up files/directories in |output_dir| that are referenced by 206 the given |state|. Raises an error if there are local changes. Returns a 207 dictionary of files that were deleted. 208 """ 209 _LOGGER.debug('Deleting files from previous installation.') 210 deleted = {} 211 212 # Generate a list of files to delete, relative to |output_dir|. 213 contents = state['contents'] 214 files = sorted(contents.keys()) 215 216 # Try to delete the files. Keep track of directories to delete as well. 217 dirs = {} 218 for relpath in files: 219 fullpath = os.path.join(output_dir, relpath) 220 fulldir = os.path.dirname(fullpath) 221 dirs[fulldir] = True 222 if os.path.exists(fullpath): 223 # If somehow the file has become a directory complain about it. 224 if os.path.isdir(fullpath): 225 raise Exception('Directory exists where file expected: %s' % fullpath) 226 227 # Double check that the file doesn't have local changes. If it does 228 # then refuse to delete it. 229 if relpath in contents: 230 stored_md5 = contents[relpath] 231 actual_md5 = _Md5(fullpath) 232 if actual_md5 != stored_md5: 233 raise Exception('File has local changes: %s' % fullpath) 234 235 # The file is unchanged so it can safely be deleted. 236 _LOGGER.debug('Deleting file "%s".', fullpath) 237 deleted[relpath] = True 238 if not dry_run: 239 os.unlink(fullpath) 240 241 # Sort directories from longest name to shortest. This lets us remove empty 242 # directories from the most nested paths first. 243 dirs = sorted(dirs.keys(), key=lambda x: len(x), reverse=True) 244 for p in dirs: 245 if os.path.exists(p) and _DirIsEmpty(p): 246 _LOGGER.debug('Deleting empty directory "%s".', p) 247 if not dry_run: 248 _RmTree(p) 249 250 return deleted 251 252 253 def _Download(url): 254 """Downloads the given URL and returns the contents as a string.""" 255 response = urllib2.urlopen(url) 256 if response.code != 200: 257 raise RuntimeError('Failed to download "%s".' % url) 258 return response.read() 259 260 261 def _InstallBinaries(options, deleted={}): 262 """Installs Syzygy binaries. This assumes that the output directory has 263 already been cleaned, as it will refuse to overwrite existing files.""" 264 contents = {} 265 state = { 'revision': options.revision, 'contents': contents } 266 archive_url = _SYZYGY_ARCHIVE_URL % { 'revision': options.revision } 267 for (base, name, subdir, filt) in _RESOURCES: 268 # Create the output directory if it doesn't exist. 269 fulldir = os.path.join(options.output_dir, subdir) 270 if os.path.isfile(fulldir): 271 raise Exception('File exists where a directory needs to be created: %s' % 272 fulldir) 273 if not os.path.exists(fulldir): 274 _LOGGER.debug('Creating directory: %s', fulldir) 275 if not options.dry_run: 276 os.makedirs(fulldir) 277 278 # Download the archive. 279 url = archive_url + '/' + base 280 _LOGGER.debug('Retrieving %s archive at "%s".', name, url) 281 data = _Download(url) 282 283 _LOGGER.debug('Unzipping %s archive.', name) 284 archive = zipfile.ZipFile(cStringIO.StringIO(data)) 285 for entry in archive.infolist(): 286 if not filt or filt(entry): 287 fullpath = os.path.normpath(os.path.join(fulldir, entry.filename)) 288 relpath = os.path.relpath(fullpath, options.output_dir) 289 if os.path.exists(fullpath): 290 # If in a dry-run take into account the fact that the file *would* 291 # have been deleted. 292 if options.dry_run and relpath in deleted: 293 pass 294 else: 295 raise Exception('Path already exists: %s' % fullpath) 296 297 # Extract the file and update the state dictionary. 298 _LOGGER.debug('Extracting "%s".', fullpath) 299 if not options.dry_run: 300 archive.extract(entry.filename, fulldir) 301 md5 = _Md5(fullpath) 302 contents[relpath] = md5 303 if sys.platform == 'cygwin': 304 os.chmod(fullpath, os.stat(fullpath).st_mode | stat.S_IXUSR) 305 306 return state 307 308 309 def _ParseCommandLine(): 310 """Parses the command-line and returns an options structure.""" 311 option_parser = optparse.OptionParser() 312 option_parser.add_option('--dry-run', action='store_true', default=False, 313 help='If true then will simply list actions that would be performed.') 314 option_parser.add_option('--force', action='store_true', default=False, 315 help='Force an installation even if the binaries are up to date.') 316 option_parser.add_option('--output-dir', type='string', 317 help='The path where the binaries will be replaced. Existing binaries ' 318 'will only be overwritten if not up to date.') 319 option_parser.add_option('--overwrite', action='store_true', default=False, 320 help='If specified then the installation will happily delete and rewrite ' 321 'the entire output directory, blasting any local changes.') 322 option_parser.add_option('--revision', type='string', 323 help='The SVN revision or GIT hash associated with the required version.') 324 option_parser.add_option('--revision-file', type='string', 325 help='A text file containing an SVN revision or GIT hash.') 326 option_parser.add_option('--verbose', dest='log_level', action='store_const', 327 default=logging.INFO, const=logging.DEBUG, 328 help='Enables verbose logging.') 329 option_parser.add_option('--quiet', dest='log_level', action='store_const', 330 default=logging.INFO, const=logging.ERROR, 331 help='Disables all output except for errors.') 332 options, args = option_parser.parse_args() 333 if args: 334 option_parser.error('Unexpected arguments: %s' % args) 335 if not options.output_dir: 336 option_parser.error('Must specify --output-dir.') 337 if not options.revision and not options.revision_file: 338 option_parser.error('Must specify one of --revision or --revision-file.') 339 if options.revision and options.revision_file: 340 option_parser.error('Must not specify both --revision and --revision-file.') 341 342 # Configure logging. 343 logging.basicConfig(level=options.log_level) 344 345 # If a revision file has been specified then read it. 346 if options.revision_file: 347 options.revision = open(options.revision_file, 'rb').read().strip() 348 _LOGGER.debug('Parsed revision "%s" from file "%s".', 349 options.revision, options.revision_file) 350 351 # Ensure that the specified SVN revision or GIT hash is valid. 352 if not _REVISION_RE.match(options.revision): 353 option_parser.error('Must specify a valid SVN or GIT revision.') 354 355 # This just makes output prettier to read. 356 options.output_dir = os.path.normpath(options.output_dir) 357 358 return options 359 360 361 def main(): 362 # We only care about Windows platforms, as the Syzygy binaries aren't used 363 # elsewhere. 364 if sys.platform not in ('win32', 'cygwin'): 365 return 366 367 options = _ParseCommandLine() 368 369 if options.dry_run: 370 _LOGGER.debug('Performing a dry-run.') 371 372 # Load the current installation state, and validate it against the 373 # requested installation. 374 state, is_consistent = _GetCurrentState(options.revision, options.output_dir) 375 376 # Decide whether or not an install is necessary. 377 if options.force: 378 _LOGGER.debug('Forcing reinstall of binaries.') 379 elif is_consistent: 380 # Avoid doing any work if the contents of the directory are consistent. 381 _LOGGER.debug('State unchanged, no reinstall necessary.') 382 return 383 384 # Under normal logging this is the only only message that will be reported. 385 _LOGGER.info('Installing revision %s Syzygy binaries.', 386 options.revision[0:12]) 387 388 # Clean up the old state to begin with. 389 deleted = [] 390 if options.overwrite: 391 if os.path.exists(options.output_dir): 392 # If overwrite was specified then take a heavy-handed approach. 393 _LOGGER.debug('Deleting entire installation directory.') 394 if not options.dry_run: 395 _RmTree(options.output_dir) 396 else: 397 # Otherwise only delete things that the previous installation put in place, 398 # and take care to preserve any local changes. 399 deleted = _CleanState(options.output_dir, state, options.dry_run) 400 401 # Install the new binaries. In a dry-run this will actually download the 402 # archives, but it won't write anything to disk. 403 state = _InstallBinaries(options, deleted) 404 405 # Build and save the state for the directory. 406 _SaveState(options.output_dir, state, options.dry_run) 407 408 409 if __name__ == '__main__': 410 main() 411