Home | History | Annotate | Download | only in bot
      1 # Copyright (c) 2015, Google Inc.
      2 #
      3 # Permission to use, copy, modify, and/or distribute this software for any
      4 # purpose with or without fee is hereby granted, provided that the above
      5 # copyright notice and this permission notice appear in all copies.
      6 #
      7 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
      8 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
      9 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
     10 # SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     11 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
     12 # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
     13 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     14 
     15 """Extracts archives."""
     16 
     17 
     18 import hashlib
     19 import optparse
     20 import os
     21 import os.path
     22 import tarfile
     23 import shutil
     24 import sys
     25 import zipfile
     26 
     27 
     28 def CheckedJoin(output, path):
     29   """
     30   CheckedJoin returns os.path.join(output, path). It does sanity checks to
     31   ensure the resulting path is under output, but shouldn't be used on untrusted
     32   input.
     33   """
     34   path = os.path.normpath(path)
     35   if os.path.isabs(path) or path.startswith('.'):
     36     raise ValueError(path)
     37   return os.path.join(output, path)
     38 
     39 
     40 class FileEntry(object):
     41   def __init__(self, path, mode, fileobj):
     42     self.path = path
     43     self.mode = mode
     44     self.fileobj = fileobj
     45 
     46 
     47 class SymlinkEntry(object):
     48   def __init__(self, path, mode, target):
     49     self.path = path
     50     self.mode = mode
     51     self.target = target
     52 
     53 
     54 def IterateZip(path):
     55   """
     56   IterateZip opens the zip file at path and returns a generator of entry objects
     57   for each file in it.
     58   """
     59   with zipfile.ZipFile(path, 'r') as zip_file:
     60     for info in zip_file.infolist():
     61       if info.filename.endswith('/'):
     62         continue
     63       yield FileEntry(info.filename, None, zip_file.open(info))
     64 
     65 
     66 def IterateTar(path, compression):
     67   """
     68   IterateTar opens the tar.gz or tar.bz2 file at path and returns a generator of
     69   entry objects for each file in it.
     70   """
     71   with tarfile.open(path, 'r:' + compression) as tar_file:
     72     for info in tar_file:
     73       if info.isdir():
     74         pass
     75       elif info.issym():
     76         yield SymlinkEntry(info.name, None, info.linkname)
     77       elif info.isfile():
     78         yield FileEntry(info.name, info.mode, tar_file.extractfile(info))
     79       else:
     80         raise ValueError('Unknown entry type "%s"' % (info.name, ))
     81 
     82 
     83 def main(args):
     84   parser = optparse.OptionParser(usage='Usage: %prog ARCHIVE OUTPUT')
     85   parser.add_option('--no-prefix', dest='no_prefix', action='store_true',
     86                     help='Do not remove a prefix from paths in the archive.')
     87   options, args = parser.parse_args(args)
     88 
     89   if len(args) != 2:
     90     parser.print_help()
     91     return 1
     92 
     93   archive, output = args
     94 
     95   if not os.path.exists(archive):
     96     # Skip archives that weren't downloaded.
     97     return 0
     98 
     99   with open(archive) as f:
    100     sha256 = hashlib.sha256()
    101     while True:
    102       chunk = f.read(1024 * 1024)
    103       if not chunk:
    104         break
    105       sha256.update(chunk)
    106     digest = sha256.hexdigest()
    107 
    108   stamp_path = os.path.join(output, ".boringssl_archive_digest")
    109   if os.path.exists(stamp_path):
    110     with open(stamp_path) as f:
    111       if f.read().strip() == digest:
    112         print "Already up-to-date."
    113         return 0
    114 
    115   if archive.endswith('.zip'):
    116     entries = IterateZip(archive)
    117   elif archive.endswith('.tar.gz'):
    118     entries = IterateTar(archive, 'gz')
    119   elif archive.endswith('.tar.bz2'):
    120     entries = IterateTar(archive, 'bz2')
    121   else:
    122     raise ValueError(archive)
    123 
    124   try:
    125     if os.path.exists(output):
    126       print "Removing %s" % (output, )
    127       shutil.rmtree(output)
    128 
    129     print "Extracting %s to %s" % (archive, output)
    130     prefix = None
    131     num_extracted = 0
    132     for entry in entries:
    133       # Even on Windows, zip files must always use forward slashes.
    134       if '\\' in entry.path or entry.path.startswith('/'):
    135         raise ValueError(entry.path)
    136 
    137       if not options.no_prefix:
    138         new_prefix, rest = entry.path.split('/', 1)
    139 
    140         # Ensure the archive is consistent.
    141         if prefix is None:
    142           prefix = new_prefix
    143         if prefix != new_prefix:
    144           raise ValueError((prefix, new_prefix))
    145       else:
    146         rest = entry.path
    147 
    148       # Extract the file into the output directory.
    149       fixed_path = CheckedJoin(output, rest)
    150       if not os.path.isdir(os.path.dirname(fixed_path)):
    151         os.makedirs(os.path.dirname(fixed_path))
    152       if isinstance(entry, FileEntry):
    153         with open(fixed_path, 'wb') as out:
    154           shutil.copyfileobj(entry.fileobj, out)
    155       elif isinstance(entry, SymlinkEntry):
    156         os.symlink(entry.target, fixed_path)
    157       else:
    158         raise TypeError('unknown entry type')
    159 
    160       # Fix up permissions if needbe.
    161       # TODO(davidben): To be extra tidy, this should only track the execute bit
    162       # as in git.
    163       if entry.mode is not None:
    164         os.chmod(fixed_path, entry.mode)
    165 
    166       # Print every 100 files, so bots do not time out on large archives.
    167       num_extracted += 1
    168       if num_extracted % 100 == 0:
    169         print "Extracted %d files..." % (num_extracted,)
    170   finally:
    171     entries.close()
    172 
    173   with open(stamp_path, 'w') as f:
    174     f.write(digest)
    175 
    176   print "Done. Extracted %d files." % (num_extracted,)
    177   return 0
    178 
    179 
    180 if __name__ == '__main__':
    181   sys.exit(main(sys.argv[1:]))
    182