Home | History | Annotate | Download | only in bin
      1 #!/usr/bin/python
      2 # Copyright (c) 2012 The Native Client Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """A library to assist automatically downloading files.
      7 
      8 This library is used by scripts that download tarballs, zipfiles, etc. as part
      9 of the build process.
     10 """
     11 
     12 import hashlib
     13 import http_download
     14 import os.path
     15 import re
     16 import shutil
     17 import sys
     18 import time
     19 import urllib2
     20 
     21 SOURCE_STAMP = 'SOURCE_URL'
     22 HASH_STAMP = 'SOURCE_SHA1'
     23 
     24 
     25 # Designed to handle more general inputs than sys.platform because the platform
     26 # name may come from the command line.
     27 PLATFORM_COLLAPSE = {
     28     'windows': 'windows',
     29     'win32': 'windows',
     30     'cygwin': 'windows',
     31     'linux': 'linux',
     32     'linux2': 'linux',
     33     'linux3': 'linux',
     34     'darwin': 'mac',
     35     'mac': 'mac',
     36 }
     37 
     38 ARCH_COLLAPSE = {
     39     'i386'  : 'x86',
     40     'i686'  : 'x86',
     41     'x86_64': 'x86',
     42     'armv7l': 'arm',
     43 }
     44 
     45 
     46 class HashError(Exception):
     47   def __init__(self, download_url, expected_hash, actual_hash):
     48     self.download_url = download_url
     49     self.expected_hash = expected_hash
     50     self.actual_hash = actual_hash
     51 
     52   def __str__(self):
     53     return 'Got hash "%s" but expected hash "%s" for "%s"' % (
     54         self.actual_hash, self.expected_hash, self.download_url)
     55 
     56 
     57 def PlatformName(name=None):
     58   if name is None:
     59     name = sys.platform
     60   return PLATFORM_COLLAPSE[name]
     61 
     62 def ArchName(name=None):
     63   if name is None:
     64     if PlatformName() == 'windows':
     65       # TODO(pdox): Figure out how to auto-detect 32-bit vs 64-bit Windows.
     66       name = 'i386'
     67     else:
     68       import platform
     69       name = platform.machine()
     70   return ARCH_COLLAPSE[name]
     71 
     72 def EnsureFileCanBeWritten(filename):
     73   directory = os.path.dirname(filename)
     74   if not os.path.exists(directory):
     75     os.makedirs(directory)
     76 
     77 
     78 def WriteData(filename, data):
     79   EnsureFileCanBeWritten(filename)
     80   f = open(filename, 'wb')
     81   f.write(data)
     82   f.close()
     83 
     84 
     85 def WriteDataFromStream(filename, stream, chunk_size, verbose=True):
     86   EnsureFileCanBeWritten(filename)
     87   dst = open(filename, 'wb')
     88   try:
     89     while True:
     90       data = stream.read(chunk_size)
     91       if len(data) == 0:
     92         break
     93       dst.write(data)
     94       if verbose:
     95         # Indicate that we're still writing.
     96         sys.stdout.write('.')
     97         sys.stdout.flush()
     98   finally:
     99     if verbose:
    100       sys.stdout.write('\n')
    101     dst.close()
    102 
    103 
    104 def DoesStampMatch(stampfile, expected, index):
    105   try:
    106     f = open(stampfile, 'r')
    107     stamp = f.read()
    108     f.close()
    109     if stamp.split('\n')[index] == expected:
    110       return "already up-to-date."
    111     elif stamp.startswith('manual'):
    112       return "manual override."
    113     return False
    114   except IOError:
    115     return False
    116 
    117 
    118 def WriteStamp(stampfile, data):
    119   EnsureFileCanBeWritten(stampfile)
    120   f = open(stampfile, 'w')
    121   f.write(data)
    122   f.close()
    123 
    124 
    125 def StampIsCurrent(path, stamp_name, stamp_contents, min_time=None, index=0):
    126   stampfile = os.path.join(path, stamp_name)
    127 
    128   # Check if the stampfile is older than the minimum last mod time
    129   if min_time:
    130     try:
    131       stamp_time = os.stat(stampfile).st_mtime
    132       if stamp_time <= min_time:
    133         return False
    134     except OSError:
    135       return False
    136 
    137   return DoesStampMatch(stampfile, stamp_contents, index)
    138 
    139 
    140 def WriteSourceStamp(path, url):
    141   stampfile = os.path.join(path, SOURCE_STAMP)
    142   WriteStamp(stampfile, url)
    143 
    144 def WriteHashStamp(path, hash_val):
    145   hash_stampfile = os.path.join(path, HASH_STAMP)
    146   WriteStamp(hash_stampfile, hash_val)
    147 
    148 
    149 def Retry(op, *args):
    150   # Windows seems to be prone to having commands that delete files or
    151   # directories fail.  We currently do not have a complete understanding why,
    152   # and as a workaround we simply retry the command a few times.
    153   # It appears that file locks are hanging around longer than they should.  This
    154   # may be a secondary effect of processes hanging around longer than they
    155   # should.  This may be because when we kill a browser sel_ldr does not exit
    156   # immediately, etc.
    157   # Virus checkers can also accidently prevent files from being deleted, but
    158   # that shouldn't be a problem on the bots.
    159   if sys.platform in ('win32', 'cygwin'):
    160     count = 0
    161     while True:
    162       try:
    163         op(*args)
    164         break
    165       except Exception:
    166         sys.stdout.write("FAILED: %s %s\n" % (op.__name__, repr(args)))
    167         count += 1
    168         if count < 5:
    169           sys.stdout.write("RETRY: %s %s\n" % (op.__name__, repr(args)))
    170           time.sleep(pow(2, count))
    171         else:
    172           # Don't mask the exception.
    173           raise
    174   else:
    175     op(*args)
    176 
    177 
    178 def MoveDirCleanly(src, dst):
    179   RemoveDir(dst)
    180   MoveDir(src, dst)
    181 
    182 
    183 def MoveDir(src, dst):
    184   Retry(shutil.move, src, dst)
    185 
    186 
    187 def RemoveDir(path):
    188   if os.path.exists(path):
    189     Retry(shutil.rmtree, path)
    190 
    191 
    192 def RemoveFile(path):
    193   if os.path.exists(path):
    194     Retry(os.unlink, path)
    195 
    196 
    197 def _HashFileHandle(fh):
    198   """sha1 of a file like object.
    199 
    200   Arguments:
    201     fh: file handle like object to hash.
    202   Returns:
    203     sha1 as a string.
    204   """
    205   hasher = hashlib.sha1()
    206   try:
    207     while True:
    208       data = fh.read(4096)
    209       if not data:
    210         break
    211       hasher.update(data)
    212   finally:
    213     fh.close()
    214   return hasher.hexdigest()
    215 
    216 
    217 def HashFile(filename):
    218   """sha1 a file on disk.
    219 
    220   Arguments:
    221     filename: filename to hash.
    222   Returns:
    223     sha1 as a string.
    224   """
    225   fh = open(filename, 'rb')
    226   return _HashFileHandle(fh)
    227 
    228 
    229 def HashUrlByDownloading(url):
    230   """sha1 the data at an url.
    231 
    232   Arguments:
    233     url: url to download from.
    234   Returns:
    235     sha1 of the data at the url.
    236   """
    237   try:
    238     fh = urllib2.urlopen(url)
    239   except:
    240     sys.stderr.write("Failed fetching URL: %s\n" % url)
    241     raise
    242   return _HashFileHandle(fh)
    243 
    244 
    245 # Attempts to get the SHA1 hash of a file given a URL by looking for
    246 # an adjacent file with a ".sha1hash" suffix.  This saves having to
    247 # download a large tarball just to get its hash.  Otherwise, we fall
    248 # back to downloading the main file.
    249 def HashUrl(url):
    250   hash_url = '%s.sha1hash' % url
    251   try:
    252     fh = urllib2.urlopen(hash_url)
    253     data = fh.read(100)
    254     fh.close()
    255   except urllib2.HTTPError, exn:
    256     if exn.code == 404:
    257       return HashUrlByDownloading(url)
    258     raise
    259   else:
    260     if not re.match('[0-9a-f]{40}\n?$', data):
    261       raise AssertionError('Bad SHA1 hash file: %r' % data)
    262     return data.strip()
    263 
    264 
    265 def SyncURL(url, filename=None, stamp_dir=None, min_time=None,
    266             hash_val=None, keep=False, verbose=False, stamp_index=0):
    267   """Synchronize a destination file with a URL
    268 
    269   if the URL does not match the URL stamp, then we must re-download it.
    270 
    271   Arugments:
    272     url: the url which will to compare against and download
    273     filename: the file to create on download
    274     path: the download path
    275     stamp_dir: the filename containing the URL stamp to check against
    276     hash_val: if set, the expected hash which must be matched
    277     verbose: prints out status as it runs
    278     stamp_index: index within the stamp file to check.
    279   Returns:
    280     True if the file is replaced
    281     False if the file is not replaced
    282   Exception:
    283     HashError: if the hash does not match
    284   """
    285 
    286   assert url and filename
    287 
    288   # If we are not keeping the tarball, or we already have it, we can
    289   # skip downloading it for this reason. If we are keeping it,
    290   # it must exist.
    291   if keep:
    292     tarball_ok = os.path.isfile(filename)
    293   else:
    294     tarball_ok = True
    295 
    296   # If we don't need the tarball and the stamp_file matches the url, then
    297   # we must be up to date.  If the URL differs but the recorded hash matches
    298   # the one we'll insist the tarball has, then that's good enough too.
    299   # TODO(mcgrathr): Download the .sha1sum file first to compare with
    300   # the cached hash, in case --file-hash options weren't used.
    301   if tarball_ok and stamp_dir is not None:
    302     if StampIsCurrent(stamp_dir, SOURCE_STAMP, url, min_time):
    303       if verbose:
    304         print '%s is already up to date.' % filename
    305       return False
    306     if (hash_val is not None and
    307         StampIsCurrent(stamp_dir, HASH_STAMP, hash_val, min_time, stamp_index)):
    308       if verbose:
    309         print '%s is identical to the up to date file.' % filename
    310       return False
    311 
    312   if verbose:
    313     print 'Updating %s\n\tfrom %s.' % (filename, url)
    314   EnsureFileCanBeWritten(filename)
    315   http_download.HttpDownload(url, filename)
    316 
    317   if hash_val:
    318     tar_hash = HashFile(filename)
    319     if hash_val != tar_hash:
    320       raise HashError(actual_hash=tar_hash, expected_hash=hash_val,
    321                       download_url=url)
    322 
    323   return True
    324