1 # Copyright (c) 2015, Google Inc. 2 # 3 # Permission to use, copy, modify, and/or distribute this software for any 4 # purpose with or without fee is hereby granted, provided that the above 5 # copyright notice and this permission notice appear in all copies. 6 # 7 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10 # SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12 # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15 """Extracts archives.""" 16 17 18 import hashlib 19 import optparse 20 import os 21 import os.path 22 import tarfile 23 import shutil 24 import sys 25 import zipfile 26 27 28 def CheckedJoin(output, path): 29 """ 30 CheckedJoin returns os.path.join(output, path). It does sanity checks to 31 ensure the resulting path is under output, but shouldn't be used on untrusted 32 input. 33 """ 34 path = os.path.normpath(path) 35 if os.path.isabs(path) or path.startswith('.'): 36 raise ValueError(path) 37 return os.path.join(output, path) 38 39 40 class FileEntry(object): 41 def __init__(self, path, mode, fileobj): 42 self.path = path 43 self.mode = mode 44 self.fileobj = fileobj 45 46 47 class SymlinkEntry(object): 48 def __init__(self, path, mode, target): 49 self.path = path 50 self.mode = mode 51 self.target = target 52 53 54 def IterateZip(path): 55 """ 56 IterateZip opens the zip file at path and returns a generator of entry objects 57 for each file in it. 58 """ 59 with zipfile.ZipFile(path, 'r') as zip_file: 60 for info in zip_file.infolist(): 61 if info.filename.endswith('/'): 62 continue 63 yield FileEntry(info.filename, None, zip_file.open(info)) 64 65 66 def IterateTar(path, compression): 67 """ 68 IterateTar opens the tar.gz or tar.bz2 file at path and returns a generator of 69 entry objects for each file in it. 70 """ 71 with tarfile.open(path, 'r:' + compression) as tar_file: 72 for info in tar_file: 73 if info.isdir(): 74 pass 75 elif info.issym(): 76 yield SymlinkEntry(info.name, None, info.linkname) 77 elif info.isfile(): 78 yield FileEntry(info.name, info.mode, tar_file.extractfile(info)) 79 else: 80 raise ValueError('Unknown entry type "%s"' % (info.name, )) 81 82 83 def main(args): 84 parser = optparse.OptionParser(usage='Usage: %prog ARCHIVE OUTPUT') 85 parser.add_option('--no-prefix', dest='no_prefix', action='store_true', 86 help='Do not remove a prefix from paths in the archive.') 87 options, args = parser.parse_args(args) 88 89 if len(args) != 2: 90 parser.print_help() 91 return 1 92 93 archive, output = args 94 95 if not os.path.exists(archive): 96 # Skip archives that weren't downloaded. 97 return 0 98 99 with open(archive) as f: 100 sha256 = hashlib.sha256() 101 while True: 102 chunk = f.read(1024 * 1024) 103 if not chunk: 104 break 105 sha256.update(chunk) 106 digest = sha256.hexdigest() 107 108 stamp_path = os.path.join(output, ".boringssl_archive_digest") 109 if os.path.exists(stamp_path): 110 with open(stamp_path) as f: 111 if f.read().strip() == digest: 112 print "Already up-to-date." 113 return 0 114 115 if archive.endswith('.zip'): 116 entries = IterateZip(archive) 117 elif archive.endswith('.tar.gz'): 118 entries = IterateTar(archive, 'gz') 119 elif archive.endswith('.tar.bz2'): 120 entries = IterateTar(archive, 'bz2') 121 else: 122 raise ValueError(archive) 123 124 try: 125 if os.path.exists(output): 126 print "Removing %s" % (output, ) 127 shutil.rmtree(output) 128 129 print "Extracting %s to %s" % (archive, output) 130 prefix = None 131 num_extracted = 0 132 for entry in entries: 133 # Even on Windows, zip files must always use forward slashes. 134 if '\\' in entry.path or entry.path.startswith('/'): 135 raise ValueError(entry.path) 136 137 if not options.no_prefix: 138 new_prefix, rest = entry.path.split('/', 1) 139 140 # Ensure the archive is consistent. 141 if prefix is None: 142 prefix = new_prefix 143 if prefix != new_prefix: 144 raise ValueError((prefix, new_prefix)) 145 else: 146 rest = entry.path 147 148 # Extract the file into the output directory. 149 fixed_path = CheckedJoin(output, rest) 150 if not os.path.isdir(os.path.dirname(fixed_path)): 151 os.makedirs(os.path.dirname(fixed_path)) 152 if isinstance(entry, FileEntry): 153 with open(fixed_path, 'wb') as out: 154 shutil.copyfileobj(entry.fileobj, out) 155 elif isinstance(entry, SymlinkEntry): 156 os.symlink(entry.target, fixed_path) 157 else: 158 raise TypeError('unknown entry type') 159 160 # Fix up permissions if needbe. 161 # TODO(davidben): To be extra tidy, this should only track the execute bit 162 # as in git. 163 if entry.mode is not None: 164 os.chmod(fixed_path, entry.mode) 165 166 # Print every 100 files, so bots do not time out on large archives. 167 num_extracted += 1 168 if num_extracted % 100 == 0: 169 print "Extracted %d files..." % (num_extracted,) 170 finally: 171 entries.close() 172 173 with open(stamp_path, 'w') as f: 174 f.write(digest) 175 176 print "Done. Extracted %d files." % (num_extracted,) 177 return 0 178 179 180 if __name__ == '__main__': 181 sys.exit(main(sys.argv[1:])) 182