1 """ 2 Read and write ZIP files. 3 """ 4 import struct, os, time, sys, shutil 5 import binascii, cStringIO, stat 6 import io 7 import re 8 import string 9 10 try: 11 import zlib # We may need its compression method 12 crc32 = zlib.crc32 13 except ImportError: 14 zlib = None 15 crc32 = binascii.crc32 16 17 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", 18 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ] 19 20 class BadZipfile(Exception): 21 pass 22 23 24 class LargeZipFile(Exception): 25 """ 26 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 27 and those extensions are disabled. 28 """ 29 30 error = BadZipfile # The exception raised by this module 31 32 ZIP64_LIMIT = (1 << 31) - 1 33 ZIP_FILECOUNT_LIMIT = 1 << 16 34 ZIP_MAX_COMMENT = (1 << 16) - 1 35 36 # constants for Zip file compression methods 37 ZIP_STORED = 0 38 ZIP_DEFLATED = 8 39 # Other ZIP compression methods not supported 40 41 # Below are some formats and associated data for reading/writing headers using 42 # the struct module. The names and structures of headers/records are those used 43 # in the PKWARE description of the ZIP file format: 44 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT 45 # (URL valid as of January 2008) 46 47 # The "end of central directory" structure, magic number, size, and indices 48 # (section V.I in the format document) 49 structEndArchive = "<4s4H2LH" 50 stringEndArchive = "PK\005\006" 51 sizeEndCentDir = struct.calcsize(structEndArchive) 52 53 _ECD_SIGNATURE = 0 54 _ECD_DISK_NUMBER = 1 55 _ECD_DISK_START = 2 56 _ECD_ENTRIES_THIS_DISK = 3 57 _ECD_ENTRIES_TOTAL = 4 58 _ECD_SIZE = 5 59 _ECD_OFFSET = 6 60 _ECD_COMMENT_SIZE = 7 61 # These last two indices are not part of the structure as defined in the 62 # spec, but they are used internally by this module as a convenience 63 _ECD_COMMENT = 8 64 _ECD_LOCATION = 9 65 66 # The "central directory" structure, magic number, size, and indices 67 # of entries in the structure (section V.F in the format document) 68 structCentralDir = "<4s4B4HL2L5H2L" 69 stringCentralDir = "PK\001\002" 70 sizeCentralDir = struct.calcsize(structCentralDir) 71 72 # indexes of entries in the central directory structure 73 _CD_SIGNATURE = 0 74 _CD_CREATE_VERSION = 1 75 _CD_CREATE_SYSTEM = 2 76 _CD_EXTRACT_VERSION = 3 77 _CD_EXTRACT_SYSTEM = 4 78 _CD_FLAG_BITS = 5 79 _CD_COMPRESS_TYPE = 6 80 _CD_TIME = 7 81 _CD_DATE = 8 82 _CD_CRC = 9 83 _CD_COMPRESSED_SIZE = 10 84 _CD_UNCOMPRESSED_SIZE = 11 85 _CD_FILENAME_LENGTH = 12 86 _CD_EXTRA_FIELD_LENGTH = 13 87 _CD_COMMENT_LENGTH = 14 88 _CD_DISK_NUMBER_START = 15 89 _CD_INTERNAL_FILE_ATTRIBUTES = 16 90 _CD_EXTERNAL_FILE_ATTRIBUTES = 17 91 _CD_LOCAL_HEADER_OFFSET = 18 92 93 # The "local file header" structure, magic number, size, and indices 94 # (section V.A in the format document) 95 structFileHeader = "<4s2B4HL2L2H" 96 stringFileHeader = "PK\003\004" 97 sizeFileHeader = struct.calcsize(structFileHeader) 98 99 _FH_SIGNATURE = 0 100 _FH_EXTRACT_VERSION = 1 101 _FH_EXTRACT_SYSTEM = 2 102 _FH_GENERAL_PURPOSE_FLAG_BITS = 3 103 _FH_COMPRESSION_METHOD = 4 104 _FH_LAST_MOD_TIME = 5 105 _FH_LAST_MOD_DATE = 6 106 _FH_CRC = 7 107 _FH_COMPRESSED_SIZE = 8 108 _FH_UNCOMPRESSED_SIZE = 9 109 _FH_FILENAME_LENGTH = 10 110 _FH_EXTRA_FIELD_LENGTH = 11 111 112 # The "Zip64 end of central directory locator" structure, magic number, and size 113 structEndArchive64Locator = "<4sLQL" 114 stringEndArchive64Locator = "PK\x06\x07" 115 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 116 117 # The "Zip64 end of central directory" record, magic number, size, and indices 118 # (section V.G in the format document) 119 structEndArchive64 = "<4sQ2H2L4Q" 120 stringEndArchive64 = "PK\x06\x06" 121 sizeEndCentDir64 = struct.calcsize(structEndArchive64) 122 123 _CD64_SIGNATURE = 0 124 _CD64_DIRECTORY_RECSIZE = 1 125 _CD64_CREATE_VERSION = 2 126 _CD64_EXTRACT_VERSION = 3 127 _CD64_DISK_NUMBER = 4 128 _CD64_DISK_NUMBER_START = 5 129 _CD64_NUMBER_ENTRIES_THIS_DISK = 6 130 _CD64_NUMBER_ENTRIES_TOTAL = 7 131 _CD64_DIRECTORY_SIZE = 8 132 _CD64_OFFSET_START_CENTDIR = 9 133 134 def _check_zipfile(fp): 135 try: 136 if _EndRecData(fp): 137 return True # file has correct magic number 138 except IOError: 139 pass 140 return False 141 142 def is_zipfile(filename): 143 """Quickly see if a file is a ZIP file by checking the magic number. 144 145 The filename argument may be a file or file-like object too. 146 """ 147 result = False 148 try: 149 if hasattr(filename, "read"): 150 result = _check_zipfile(fp=filename) 151 else: 152 with open(filename, "rb") as fp: 153 result = _check_zipfile(fp) 154 except IOError: 155 pass 156 return result 157 158 def _EndRecData64(fpin, offset, endrec): 159 """ 160 Read the ZIP64 end-of-archive records and use that to update endrec 161 """ 162 try: 163 fpin.seek(offset - sizeEndCentDir64Locator, 2) 164 except IOError: 165 # If the seek fails, the file is not large enough to contain a ZIP64 166 # end-of-archive record, so just return the end record we were given. 167 return endrec 168 169 data = fpin.read(sizeEndCentDir64Locator) 170 if len(data) != sizeEndCentDir64Locator: 171 return endrec 172 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 173 if sig != stringEndArchive64Locator: 174 return endrec 175 176 if diskno != 0 or disks != 1: 177 raise BadZipfile("zipfiles that span multiple disks are not supported") 178 179 # Assume no 'zip64 extensible data' 180 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 181 data = fpin.read(sizeEndCentDir64) 182 if len(data) != sizeEndCentDir64: 183 return endrec 184 sig, sz, create_version, read_version, disk_num, disk_dir, \ 185 dircount, dircount2, dirsize, diroffset = \ 186 struct.unpack(structEndArchive64, data) 187 if sig != stringEndArchive64: 188 return endrec 189 190 # Update the original endrec using data from the ZIP64 record 191 endrec[_ECD_SIGNATURE] = sig 192 endrec[_ECD_DISK_NUMBER] = disk_num 193 endrec[_ECD_DISK_START] = disk_dir 194 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 195 endrec[_ECD_ENTRIES_TOTAL] = dircount2 196 endrec[_ECD_SIZE] = dirsize 197 endrec[_ECD_OFFSET] = diroffset 198 return endrec 199 200 201 def _EndRecData(fpin): 202 """Return data from the "End of Central Directory" record, or None. 203 204 The data is a list of the nine items in the ZIP "End of central dir" 205 record followed by a tenth item, the file seek offset of this record.""" 206 207 # Determine file size 208 fpin.seek(0, 2) 209 filesize = fpin.tell() 210 211 # Check to see if this is ZIP file with no archive comment (the 212 # "end of central directory" structure should be the last item in the 213 # file if this is the case). 214 try: 215 fpin.seek(-sizeEndCentDir, 2) 216 except IOError: 217 return None 218 data = fpin.read() 219 if (len(data) == sizeEndCentDir and 220 data[0:4] == stringEndArchive and 221 data[-2:] == b"\000\000"): 222 # the signature is correct and there's no comment, unpack structure 223 endrec = struct.unpack(structEndArchive, data) 224 endrec=list(endrec) 225 226 # Append a blank comment and record start offset 227 endrec.append("") 228 endrec.append(filesize - sizeEndCentDir) 229 230 # Try to read the "Zip64 end of central directory" structure 231 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 232 233 # Either this is not a ZIP file, or it is a ZIP file with an archive 234 # comment. Search the end of the file for the "end of central directory" 235 # record signature. The comment is the last item in the ZIP file and may be 236 # up to 64K long. It is assumed that the "end of central directory" magic 237 # number does not appear in the comment. 238 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 239 fpin.seek(maxCommentStart, 0) 240 data = fpin.read() 241 start = data.rfind(stringEndArchive) 242 if start >= 0: 243 # found the magic number; attempt to unpack and interpret 244 recData = data[start:start+sizeEndCentDir] 245 if len(recData) != sizeEndCentDir: 246 # Zip file is corrupted. 247 return None 248 endrec = list(struct.unpack(structEndArchive, recData)) 249 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 250 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 251 endrec.append(comment) 252 endrec.append(maxCommentStart + start) 253 254 # Try to read the "Zip64 end of central directory" structure 255 return _EndRecData64(fpin, maxCommentStart + start - filesize, 256 endrec) 257 258 # Unable to find a valid end of central directory structure 259 return None 260 261 262 class ZipInfo (object): 263 """Class with attributes describing each file in the ZIP archive.""" 264 265 __slots__ = ( 266 'orig_filename', 267 'filename', 268 'date_time', 269 'compress_type', 270 'comment', 271 'extra', 272 'create_system', 273 'create_version', 274 'extract_version', 275 'reserved', 276 'flag_bits', 277 'volume', 278 'internal_attr', 279 'external_attr', 280 'header_offset', 281 'CRC', 282 'compress_size', 283 'file_size', 284 '_raw_time', 285 ) 286 287 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 288 self.orig_filename = filename # Original file name in archive 289 290 # Terminate the file name at the first null byte. Null bytes in file 291 # names are used as tricks by viruses in archives. 292 null_byte = filename.find(chr(0)) 293 if null_byte >= 0: 294 filename = filename[0:null_byte] 295 # This is used to ensure paths in generated ZIP files always use 296 # forward slashes as the directory separator, as required by the 297 # ZIP format specification. 298 if os.sep != "/" and os.sep in filename: 299 filename = filename.replace(os.sep, "/") 300 301 self.filename = filename # Normalized file name 302 self.date_time = date_time # year, month, day, hour, min, sec 303 304 if date_time[0] < 1980: 305 raise ValueError('ZIP does not support timestamps before 1980') 306 307 # Standard values: 308 self.compress_type = ZIP_STORED # Type of compression for the file 309 self.comment = "" # Comment for each file 310 self.extra = "" # ZIP extra data 311 if sys.platform == 'win32': 312 self.create_system = 0 # System which created ZIP archive 313 else: 314 # Assume everything else is unix-y 315 self.create_system = 3 # System which created ZIP archive 316 self.create_version = 20 # Version which created ZIP archive 317 self.extract_version = 20 # Version needed to extract archive 318 self.reserved = 0 # Must be zero 319 self.flag_bits = 0 # ZIP flag bits 320 self.volume = 0 # Volume number of file header 321 self.internal_attr = 0 # Internal attributes 322 self.external_attr = 0 # External file attributes 323 # Other attributes are set by class ZipFile: 324 # header_offset Byte offset to the file header 325 # CRC CRC-32 of the uncompressed file 326 # compress_size Size of the compressed file 327 # file_size Size of the uncompressed file 328 329 def FileHeader(self, zip64=None): 330 """Return the per-file header as a string.""" 331 dt = self.date_time 332 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 333 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 334 if self.flag_bits & 0x08: 335 # Set these to zero because we write them after the file data 336 CRC = compress_size = file_size = 0 337 else: 338 CRC = self.CRC 339 compress_size = self.compress_size 340 file_size = self.file_size 341 342 extra = self.extra 343 344 if zip64 is None: 345 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 346 if zip64: 347 fmt = '<HHQQ' 348 extra = extra + struct.pack(fmt, 349 1, struct.calcsize(fmt)-4, file_size, compress_size) 350 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 351 if not zip64: 352 raise LargeZipFile("Filesize would require ZIP64 extensions") 353 # File is larger than what fits into a 4 byte integer, 354 # fall back to the ZIP64 extension 355 file_size = 0xffffffff 356 compress_size = 0xffffffff 357 self.extract_version = max(45, self.extract_version) 358 self.create_version = max(45, self.extract_version) 359 360 filename, flag_bits = self._encodeFilenameFlags() 361 header = struct.pack(structFileHeader, stringFileHeader, 362 self.extract_version, self.reserved, flag_bits, 363 self.compress_type, dostime, dosdate, CRC, 364 compress_size, file_size, 365 len(filename), len(extra)) 366 return header + filename + extra 367 368 def _encodeFilenameFlags(self): 369 if isinstance(self.filename, unicode): 370 try: 371 return self.filename.encode('ascii'), self.flag_bits 372 except UnicodeEncodeError: 373 return self.filename.encode('utf-8'), self.flag_bits | 0x800 374 else: 375 return self.filename, self.flag_bits 376 377 def _decodeFilename(self): 378 if self.flag_bits & 0x800: 379 return self.filename.decode('utf-8') 380 else: 381 return self.filename 382 383 def _decodeExtra(self): 384 # Try to decode the extra field. 385 extra = self.extra 386 unpack = struct.unpack 387 while extra: 388 tp, ln = unpack('<HH', extra[:4]) 389 if tp == 1: 390 if ln >= 24: 391 counts = unpack('<QQQ', extra[4:28]) 392 elif ln == 16: 393 counts = unpack('<QQ', extra[4:20]) 394 elif ln == 8: 395 counts = unpack('<Q', extra[4:12]) 396 elif ln == 0: 397 counts = () 398 else: 399 raise RuntimeError, "Corrupt extra field %s"%(ln,) 400 401 idx = 0 402 403 # ZIP64 extension (large files and/or large archives) 404 if self.file_size in (0xffffffffffffffffL, 0xffffffffL): 405 self.file_size = counts[idx] 406 idx += 1 407 408 if self.compress_size == 0xFFFFFFFFL: 409 self.compress_size = counts[idx] 410 idx += 1 411 412 if self.header_offset == 0xffffffffL: 413 old = self.header_offset 414 self.header_offset = counts[idx] 415 idx+=1 416 417 extra = extra[ln+4:] 418 419 420 class _ZipDecrypter: 421 """Class to handle decryption of files stored within a ZIP archive. 422 423 ZIP supports a password-based form of encryption. Even though known 424 plaintext attacks have been found against it, it is still useful 425 to be able to get data out of such a file. 426 427 Usage: 428 zd = _ZipDecrypter(mypwd) 429 plain_char = zd(cypher_char) 430 plain_text = map(zd, cypher_text) 431 """ 432 433 def _GenerateCRCTable(): 434 """Generate a CRC-32 table. 435 436 ZIP encryption uses the CRC32 one-byte primitive for scrambling some 437 internal keys. We noticed that a direct implementation is faster than 438 relying on binascii.crc32(). 439 """ 440 poly = 0xedb88320 441 table = [0] * 256 442 for i in range(256): 443 crc = i 444 for j in range(8): 445 if crc & 1: 446 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly 447 else: 448 crc = ((crc >> 1) & 0x7FFFFFFF) 449 table[i] = crc 450 return table 451 crctable = _GenerateCRCTable() 452 453 def _crc32(self, ch, crc): 454 """Compute the CRC32 primitive on one byte.""" 455 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff] 456 457 def __init__(self, pwd): 458 self.key0 = 305419896 459 self.key1 = 591751049 460 self.key2 = 878082192 461 for p in pwd: 462 self._UpdateKeys(p) 463 464 def _UpdateKeys(self, c): 465 self.key0 = self._crc32(c, self.key0) 466 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 467 self.key1 = (self.key1 * 134775813 + 1) & 4294967295 468 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2) 469 470 def __call__(self, c): 471 """Decrypt a single character.""" 472 c = ord(c) 473 k = self.key2 | 2 474 c = c ^ (((k * (k^1)) >> 8) & 255) 475 c = chr(c) 476 self._UpdateKeys(c) 477 return c 478 479 480 compressor_names = { 481 0: 'store', 482 1: 'shrink', 483 2: 'reduce', 484 3: 'reduce', 485 4: 'reduce', 486 5: 'reduce', 487 6: 'implode', 488 7: 'tokenize', 489 8: 'deflate', 490 9: 'deflate64', 491 10: 'implode', 492 12: 'bzip2', 493 14: 'lzma', 494 18: 'terse', 495 19: 'lz77', 496 97: 'wavpack', 497 98: 'ppmd', 498 } 499 500 501 class ZipExtFile(io.BufferedIOBase): 502 """File-like object for reading an archive member. 503 Is returned by ZipFile.open(). 504 """ 505 506 # Max size supported by decompressor. 507 MAX_N = 1 << 31 - 1 508 509 # Read from compressed files in 4k blocks. 510 MIN_READ_SIZE = 4096 511 512 # Search for universal newlines or line chunks. 513 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)') 514 515 def __init__(self, fileobj, mode, zipinfo, decrypter=None, 516 close_fileobj=False): 517 self._fileobj = fileobj 518 self._decrypter = decrypter 519 self._close_fileobj = close_fileobj 520 521 self._compress_type = zipinfo.compress_type 522 self._compress_size = zipinfo.compress_size 523 self._compress_left = zipinfo.compress_size 524 525 if self._compress_type == ZIP_DEFLATED: 526 self._decompressor = zlib.decompressobj(-15) 527 elif self._compress_type != ZIP_STORED: 528 descr = compressor_names.get(self._compress_type) 529 if descr: 530 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr)) 531 else: 532 raise NotImplementedError("compression type %d" % (self._compress_type,)) 533 self._unconsumed = '' 534 535 self._readbuffer = '' 536 self._offset = 0 537 538 self._universal = 'U' in mode 539 self.newlines = None 540 541 # Adjust read size for encrypted files since the first 12 bytes 542 # are for the encryption/password information. 543 if self._decrypter is not None: 544 self._compress_left -= 12 545 546 self.mode = mode 547 self.name = zipinfo.filename 548 549 if hasattr(zipinfo, 'CRC'): 550 self._expected_crc = zipinfo.CRC 551 self._running_crc = crc32(b'') & 0xffffffff 552 else: 553 self._expected_crc = None 554 555 def readline(self, limit=-1): 556 """Read and return a line from the stream. 557 558 If limit is specified, at most limit bytes will be read. 559 """ 560 561 if not self._universal and limit < 0: 562 # Shortcut common case - newline found in buffer. 563 i = self._readbuffer.find('\n', self._offset) + 1 564 if i > 0: 565 line = self._readbuffer[self._offset: i] 566 self._offset = i 567 return line 568 569 if not self._universal: 570 return io.BufferedIOBase.readline(self, limit) 571 572 line = '' 573 while limit < 0 or len(line) < limit: 574 readahead = self.peek(2) 575 if readahead == '': 576 return line 577 578 # 579 # Search for universal newlines or line chunks. 580 # 581 # The pattern returns either a line chunk or a newline, but not 582 # both. Combined with peek(2), we are assured that the sequence 583 # '\r\n' is always retrieved completely and never split into 584 # separate newlines - '\r', '\n' due to coincidental readaheads. 585 # 586 match = self.PATTERN.search(readahead) 587 newline = match.group('newline') 588 if newline is not None: 589 if self.newlines is None: 590 self.newlines = [] 591 if newline not in self.newlines: 592 self.newlines.append(newline) 593 self._offset += len(newline) 594 return line + '\n' 595 596 chunk = match.group('chunk') 597 if limit >= 0: 598 chunk = chunk[: limit - len(line)] 599 600 self._offset += len(chunk) 601 line += chunk 602 603 return line 604 605 def peek(self, n=1): 606 """Returns buffered bytes without advancing the position.""" 607 if n > len(self._readbuffer) - self._offset: 608 chunk = self.read(n) 609 self._offset -= len(chunk) 610 611 # Return up to 512 bytes to reduce allocation overhead for tight loops. 612 return self._readbuffer[self._offset: self._offset + 512] 613 614 def readable(self): 615 return True 616 617 def read(self, n=-1): 618 """Read and return up to n bytes. 619 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.. 620 """ 621 buf = '' 622 if n is None: 623 n = -1 624 while True: 625 if n < 0: 626 data = self.read1(n) 627 elif n > len(buf): 628 data = self.read1(n - len(buf)) 629 else: 630 return buf 631 if len(data) == 0: 632 return buf 633 buf += data 634 635 def _update_crc(self, newdata, eof): 636 # Update the CRC using the given data. 637 if self._expected_crc is None: 638 # No need to compute the CRC if we don't have a reference value 639 return 640 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff 641 # Check the CRC if we're at the end of the file 642 if eof and self._running_crc != self._expected_crc: 643 raise BadZipfile("Bad CRC-32 for file %r" % self.name) 644 645 def read1(self, n): 646 """Read up to n bytes with at most one read() system call.""" 647 648 # Simplify algorithm (branching) by transforming negative n to large n. 649 if n < 0 or n is None: 650 n = self.MAX_N 651 652 # Bytes available in read buffer. 653 len_readbuffer = len(self._readbuffer) - self._offset 654 655 # Read from file. 656 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed): 657 nbytes = n - len_readbuffer - len(self._unconsumed) 658 nbytes = max(nbytes, self.MIN_READ_SIZE) 659 nbytes = min(nbytes, self._compress_left) 660 661 data = self._fileobj.read(nbytes) 662 self._compress_left -= len(data) 663 664 if data and self._decrypter is not None: 665 data = ''.join(map(self._decrypter, data)) 666 667 if self._compress_type == ZIP_STORED: 668 self._update_crc(data, eof=(self._compress_left==0)) 669 self._readbuffer = self._readbuffer[self._offset:] + data 670 self._offset = 0 671 else: 672 # Prepare deflated bytes for decompression. 673 self._unconsumed += data 674 675 # Handle unconsumed data. 676 if (len(self._unconsumed) > 0 and n > len_readbuffer and 677 self._compress_type == ZIP_DEFLATED): 678 data = self._decompressor.decompress( 679 self._unconsumed, 680 max(n - len_readbuffer, self.MIN_READ_SIZE) 681 ) 682 683 self._unconsumed = self._decompressor.unconsumed_tail 684 eof = len(self._unconsumed) == 0 and self._compress_left == 0 685 if eof: 686 data += self._decompressor.flush() 687 688 self._update_crc(data, eof=eof) 689 self._readbuffer = self._readbuffer[self._offset:] + data 690 self._offset = 0 691 692 # Read from buffer. 693 data = self._readbuffer[self._offset: self._offset + n] 694 self._offset += len(data) 695 return data 696 697 def close(self): 698 try : 699 if self._close_fileobj: 700 self._fileobj.close() 701 finally: 702 super(ZipExtFile, self).close() 703 704 705 class ZipFile(object): 706 """ Class with methods to open, read, write, close, list zip files. 707 708 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False) 709 710 file: Either the path to the file, or a file-like object. 711 If it is a path, the file will be opened and closed by ZipFile. 712 mode: The mode can be either read "r", write "w" or append "a". 713 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). 714 allowZip64: if True ZipFile will create files with ZIP64 extensions when 715 needed, otherwise it will raise an exception when this would 716 be necessary. 717 718 """ 719 720 fp = None # Set here since __del__ checks it 721 722 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False): 723 """Open the ZIP file with mode read "r", write "w" or append "a".""" 724 if mode not in ("r", "w", "a"): 725 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') 726 727 if compression == ZIP_STORED: 728 pass 729 elif compression == ZIP_DEFLATED: 730 if not zlib: 731 raise RuntimeError,\ 732 "Compression requires the (missing) zlib module" 733 else: 734 raise RuntimeError, "That compression method is not supported" 735 736 self._allowZip64 = allowZip64 737 self._didModify = False 738 self.debug = 0 # Level of printing: 0 through 3 739 self.NameToInfo = {} # Find file info given name 740 self.filelist = [] # List of ZipInfo instances for archive 741 self.compression = compression # Method of compression 742 self.mode = key = mode.replace('b', '')[0] 743 self.pwd = None 744 self._comment = '' 745 746 # Check if we were passed a file-like object 747 if isinstance(file, basestring): 748 self._filePassed = 0 749 self.filename = file 750 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} 751 try: 752 self.fp = open(file, modeDict[mode]) 753 except IOError: 754 if mode == 'a': 755 mode = key = 'w' 756 self.fp = open(file, modeDict[mode]) 757 else: 758 raise 759 else: 760 self._filePassed = 1 761 self.fp = file 762 self.filename = getattr(file, 'name', None) 763 764 try: 765 if key == 'r': 766 self._RealGetContents() 767 elif key == 'w': 768 # set the modified flag so central directory gets written 769 # even if no files are added to the archive 770 self._didModify = True 771 elif key == 'a': 772 try: 773 # See if file is a zip file 774 self._RealGetContents() 775 # seek to start of directory and overwrite 776 self.fp.seek(self.start_dir, 0) 777 except BadZipfile: 778 # file is not a zip file, just append 779 self.fp.seek(0, 2) 780 781 # set the modified flag so central directory gets written 782 # even if no files are added to the archive 783 self._didModify = True 784 else: 785 raise RuntimeError('Mode must be "r", "w" or "a"') 786 except: 787 fp = self.fp 788 self.fp = None 789 if not self._filePassed: 790 fp.close() 791 raise 792 793 def __enter__(self): 794 return self 795 796 def __exit__(self, type, value, traceback): 797 self.close() 798 799 def _RealGetContents(self): 800 """Read in the table of contents for the ZIP file.""" 801 fp = self.fp 802 try: 803 endrec = _EndRecData(fp) 804 except IOError: 805 raise BadZipfile("File is not a zip file") 806 if not endrec: 807 raise BadZipfile, "File is not a zip file" 808 if self.debug > 1: 809 print endrec 810 size_cd = endrec[_ECD_SIZE] # bytes in central directory 811 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 812 self._comment = endrec[_ECD_COMMENT] # archive comment 813 814 # "concat" is zero, unless zip was concatenated to another file 815 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 816 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 817 # If Zip64 extension structures are present, account for them 818 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 819 820 if self.debug > 2: 821 inferred = concat + offset_cd 822 print "given, inferred, offset", offset_cd, inferred, concat 823 # self.start_dir: Position of start of central directory 824 self.start_dir = offset_cd + concat 825 fp.seek(self.start_dir, 0) 826 data = fp.read(size_cd) 827 fp = cStringIO.StringIO(data) 828 total = 0 829 while total < size_cd: 830 centdir = fp.read(sizeCentralDir) 831 if len(centdir) != sizeCentralDir: 832 raise BadZipfile("Truncated central directory") 833 centdir = struct.unpack(structCentralDir, centdir) 834 if centdir[_CD_SIGNATURE] != stringCentralDir: 835 raise BadZipfile("Bad magic number for central directory") 836 if self.debug > 2: 837 print centdir 838 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 839 # Create ZipInfo instance to store file information 840 x = ZipInfo(filename) 841 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 842 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 843 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 844 (x.create_version, x.create_system, x.extract_version, x.reserved, 845 x.flag_bits, x.compress_type, t, d, 846 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 847 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 848 # Convert date/time code to (year, month, day, hour, min, sec) 849 x._raw_time = t 850 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 851 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 852 853 x._decodeExtra() 854 x.header_offset = x.header_offset + concat 855 x.filename = x._decodeFilename() 856 self.filelist.append(x) 857 self.NameToInfo[x.filename] = x 858 859 # update total bytes read from central directory 860 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 861 + centdir[_CD_EXTRA_FIELD_LENGTH] 862 + centdir[_CD_COMMENT_LENGTH]) 863 864 if self.debug > 2: 865 print "total", total 866 867 868 def namelist(self): 869 """Return a list of file names in the archive.""" 870 l = [] 871 for data in self.filelist: 872 l.append(data.filename) 873 return l 874 875 def infolist(self): 876 """Return a list of class ZipInfo instances for files in the 877 archive.""" 878 return self.filelist 879 880 def printdir(self): 881 """Print a table of contents for the zip file.""" 882 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size") 883 for zinfo in self.filelist: 884 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 885 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size) 886 887 def testzip(self): 888 """Read all the files and check the CRC.""" 889 chunk_size = 2 ** 20 890 for zinfo in self.filelist: 891 try: 892 # Read by chunks, to avoid an OverflowError or a 893 # MemoryError with very large embedded files. 894 with self.open(zinfo.filename, "r") as f: 895 while f.read(chunk_size): # Check CRC-32 896 pass 897 except BadZipfile: 898 return zinfo.filename 899 900 def getinfo(self, name): 901 """Return the instance of ZipInfo given 'name'.""" 902 info = self.NameToInfo.get(name) 903 if info is None: 904 raise KeyError( 905 'There is no item named %r in the archive' % name) 906 907 return info 908 909 def setpassword(self, pwd): 910 """Set default password for encrypted files.""" 911 self.pwd = pwd 912 913 @property 914 def comment(self): 915 """The comment text associated with the ZIP file.""" 916 return self._comment 917 918 @comment.setter 919 def comment(self, comment): 920 # check for valid comment length 921 if len(comment) >= ZIP_MAX_COMMENT: 922 if self.debug: 923 print('Archive comment is too long; truncating to %d bytes' 924 % ZIP_MAX_COMMENT) 925 comment = comment[:ZIP_MAX_COMMENT] 926 self._comment = comment 927 self._didModify = True 928 929 def read(self, name, pwd=None): 930 """Return file bytes (as a string) for name.""" 931 return self.open(name, "r", pwd).read() 932 933 def open(self, name, mode="r", pwd=None): 934 """Return file-like object for 'name'.""" 935 if mode not in ("r", "U", "rU"): 936 raise RuntimeError, 'open() requires mode "r", "U", or "rU"' 937 if not self.fp: 938 raise RuntimeError, \ 939 "Attempt to read ZIP archive that was already closed" 940 941 # Only open a new file for instances where we were not 942 # given a file object in the constructor 943 if self._filePassed: 944 zef_file = self.fp 945 should_close = False 946 else: 947 zef_file = open(self.filename, 'rb') 948 should_close = True 949 950 try: 951 # Make sure we have an info object 952 if isinstance(name, ZipInfo): 953 # 'name' is already an info object 954 zinfo = name 955 else: 956 # Get info object for name 957 zinfo = self.getinfo(name) 958 959 zef_file.seek(zinfo.header_offset, 0) 960 961 # Skip the file header: 962 fheader = zef_file.read(sizeFileHeader) 963 if len(fheader) != sizeFileHeader: 964 raise BadZipfile("Truncated file header") 965 fheader = struct.unpack(structFileHeader, fheader) 966 if fheader[_FH_SIGNATURE] != stringFileHeader: 967 raise BadZipfile("Bad magic number for file header") 968 969 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 970 if fheader[_FH_EXTRA_FIELD_LENGTH]: 971 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 972 973 if fname != zinfo.orig_filename: 974 raise BadZipfile, \ 975 'File name in directory "%s" and header "%s" differ.' % ( 976 zinfo.orig_filename, fname) 977 978 # check for encrypted flag & handle password 979 is_encrypted = zinfo.flag_bits & 0x1 980 zd = None 981 if is_encrypted: 982 if not pwd: 983 pwd = self.pwd 984 if not pwd: 985 raise RuntimeError, "File %s is encrypted, " \ 986 "password required for extraction" % name 987 988 zd = _ZipDecrypter(pwd) 989 # The first 12 bytes in the cypher stream is an encryption header 990 # used to strengthen the algorithm. The first 11 bytes are 991 # completely random, while the 12th contains the MSB of the CRC, 992 # or the MSB of the file time depending on the header type 993 # and is used to check the correctness of the password. 994 bytes = zef_file.read(12) 995 h = map(zd, bytes[0:12]) 996 if zinfo.flag_bits & 0x8: 997 # compare against the file type from extended local headers 998 check_byte = (zinfo._raw_time >> 8) & 0xff 999 else: 1000 # compare against the CRC otherwise 1001 check_byte = (zinfo.CRC >> 24) & 0xff 1002 if ord(h[11]) != check_byte: 1003 raise RuntimeError("Bad password for file", name) 1004 1005 return ZipExtFile(zef_file, mode, zinfo, zd, 1006 close_fileobj=should_close) 1007 except: 1008 if should_close: 1009 zef_file.close() 1010 raise 1011 1012 def extract(self, member, path=None, pwd=None): 1013 """Extract a member from the archive to the current working directory, 1014 using its full name. Its file information is extracted as accurately 1015 as possible. `member' may be a filename or a ZipInfo object. You can 1016 specify a different directory using `path'. 1017 """ 1018 if not isinstance(member, ZipInfo): 1019 member = self.getinfo(member) 1020 1021 if path is None: 1022 path = os.getcwd() 1023 1024 return self._extract_member(member, path, pwd) 1025 1026 def extractall(self, path=None, members=None, pwd=None): 1027 """Extract all members from the archive to the current working 1028 directory. `path' specifies a different directory to extract to. 1029 `members' is optional and must be a subset of the list returned 1030 by namelist(). 1031 """ 1032 if members is None: 1033 members = self.namelist() 1034 1035 for zipinfo in members: 1036 self.extract(zipinfo, path, pwd) 1037 1038 def _extract_member(self, member, targetpath, pwd): 1039 """Extract the ZipInfo object 'member' to a physical 1040 file on the path targetpath. 1041 """ 1042 # build the destination pathname, replacing 1043 # forward slashes to platform specific separators. 1044 arcname = member.filename.replace('/', os.path.sep) 1045 1046 if os.path.altsep: 1047 arcname = arcname.replace(os.path.altsep, os.path.sep) 1048 # interpret absolute pathname as relative, remove drive letter or 1049 # UNC path, redundant separators, "." and ".." components. 1050 arcname = os.path.splitdrive(arcname)[1] 1051 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1052 if x not in ('', os.path.curdir, os.path.pardir)) 1053 if os.path.sep == '\\': 1054 # filter illegal characters on Windows 1055 illegal = ':<>|"?*' 1056 if isinstance(arcname, unicode): 1057 table = {ord(c): ord('_') for c in illegal} 1058 else: 1059 table = string.maketrans(illegal, '_' * len(illegal)) 1060 arcname = arcname.translate(table) 1061 # remove trailing dots 1062 arcname = (x.rstrip('.') for x in arcname.split(os.path.sep)) 1063 arcname = os.path.sep.join(x for x in arcname if x) 1064 1065 targetpath = os.path.join(targetpath, arcname) 1066 targetpath = os.path.normpath(targetpath) 1067 1068 # Create all upper directories if necessary. 1069 upperdirs = os.path.dirname(targetpath) 1070 if upperdirs and not os.path.exists(upperdirs): 1071 os.makedirs(upperdirs) 1072 1073 if member.filename[-1] == '/': 1074 if not os.path.isdir(targetpath): 1075 os.mkdir(targetpath) 1076 return targetpath 1077 1078 with self.open(member, pwd=pwd) as source, \ 1079 file(targetpath, "wb") as target: 1080 shutil.copyfileobj(source, target) 1081 1082 return targetpath 1083 1084 def _writecheck(self, zinfo): 1085 """Check for errors before writing a file to the archive.""" 1086 if zinfo.filename in self.NameToInfo: 1087 if self.debug: # Warning for duplicate names 1088 print "Duplicate name:", zinfo.filename 1089 if self.mode not in ("w", "a"): 1090 raise RuntimeError, 'write() requires mode "w" or "a"' 1091 if not self.fp: 1092 raise RuntimeError, \ 1093 "Attempt to write ZIP archive that was already closed" 1094 if zinfo.compress_type == ZIP_DEFLATED and not zlib: 1095 raise RuntimeError, \ 1096 "Compression requires the (missing) zlib module" 1097 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): 1098 raise RuntimeError, \ 1099 "That compression method is not supported" 1100 if zinfo.file_size > ZIP64_LIMIT: 1101 if not self._allowZip64: 1102 raise LargeZipFile("Filesize would require ZIP64 extensions") 1103 if zinfo.header_offset > ZIP64_LIMIT: 1104 if not self._allowZip64: 1105 raise LargeZipFile("Zipfile size would require ZIP64 extensions") 1106 1107 def write(self, filename, arcname=None, compress_type=None): 1108 """Put the bytes from filename into the archive under the name 1109 arcname.""" 1110 if not self.fp: 1111 raise RuntimeError( 1112 "Attempt to write to ZIP archive that was already closed") 1113 1114 st = os.stat(filename) 1115 isdir = stat.S_ISDIR(st.st_mode) 1116 mtime = time.localtime(st.st_mtime) 1117 date_time = mtime[0:6] 1118 # Create ZipInfo instance to store file information 1119 if arcname is None: 1120 arcname = filename 1121 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 1122 while arcname[0] in (os.sep, os.altsep): 1123 arcname = arcname[1:] 1124 if isdir: 1125 arcname += '/' 1126 zinfo = ZipInfo(arcname, date_time) 1127 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes 1128 if compress_type is None: 1129 zinfo.compress_type = self.compression 1130 else: 1131 zinfo.compress_type = compress_type 1132 1133 zinfo.file_size = st.st_size 1134 zinfo.flag_bits = 0x00 1135 zinfo.header_offset = self.fp.tell() # Start of header bytes 1136 1137 self._writecheck(zinfo) 1138 self._didModify = True 1139 1140 if isdir: 1141 zinfo.file_size = 0 1142 zinfo.compress_size = 0 1143 zinfo.CRC = 0 1144 self.filelist.append(zinfo) 1145 self.NameToInfo[zinfo.filename] = zinfo 1146 self.fp.write(zinfo.FileHeader(False)) 1147 return 1148 1149 with open(filename, "rb") as fp: 1150 # Must overwrite CRC and sizes with correct data later 1151 zinfo.CRC = CRC = 0 1152 zinfo.compress_size = compress_size = 0 1153 # Compressed size can be larger than uncompressed size 1154 zip64 = self._allowZip64 and \ 1155 zinfo.file_size * 1.05 > ZIP64_LIMIT 1156 self.fp.write(zinfo.FileHeader(zip64)) 1157 if zinfo.compress_type == ZIP_DEFLATED: 1158 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, 1159 zlib.DEFLATED, -15) 1160 else: 1161 cmpr = None 1162 file_size = 0 1163 while 1: 1164 buf = fp.read(1024 * 8) 1165 if not buf: 1166 break 1167 file_size = file_size + len(buf) 1168 CRC = crc32(buf, CRC) & 0xffffffff 1169 if cmpr: 1170 buf = cmpr.compress(buf) 1171 compress_size = compress_size + len(buf) 1172 self.fp.write(buf) 1173 if cmpr: 1174 buf = cmpr.flush() 1175 compress_size = compress_size + len(buf) 1176 self.fp.write(buf) 1177 zinfo.compress_size = compress_size 1178 else: 1179 zinfo.compress_size = file_size 1180 zinfo.CRC = CRC 1181 zinfo.file_size = file_size 1182 if not zip64 and self._allowZip64: 1183 if file_size > ZIP64_LIMIT: 1184 raise RuntimeError('File size has increased during compressing') 1185 if compress_size > ZIP64_LIMIT: 1186 raise RuntimeError('Compressed size larger than uncompressed size') 1187 # Seek backwards and write file header (which will now include 1188 # correct CRC and file sizes) 1189 position = self.fp.tell() # Preserve current position in file 1190 self.fp.seek(zinfo.header_offset, 0) 1191 self.fp.write(zinfo.FileHeader(zip64)) 1192 self.fp.seek(position, 0) 1193 self.filelist.append(zinfo) 1194 self.NameToInfo[zinfo.filename] = zinfo 1195 1196 def writestr(self, zinfo_or_arcname, bytes, compress_type=None): 1197 """Write a file into the archive. The contents is the string 1198 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or 1199 the name of the file in the archive.""" 1200 if not isinstance(zinfo_or_arcname, ZipInfo): 1201 zinfo = ZipInfo(filename=zinfo_or_arcname, 1202 date_time=time.localtime(time.time())[:6]) 1203 1204 zinfo.compress_type = self.compression 1205 zinfo.external_attr = 0600 << 16 1206 else: 1207 zinfo = zinfo_or_arcname 1208 1209 if not self.fp: 1210 raise RuntimeError( 1211 "Attempt to write to ZIP archive that was already closed") 1212 1213 if compress_type is not None: 1214 zinfo.compress_type = compress_type 1215 1216 zinfo.file_size = len(bytes) # Uncompressed size 1217 zinfo.header_offset = self.fp.tell() # Start of header bytes 1218 self._writecheck(zinfo) 1219 self._didModify = True 1220 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum 1221 if zinfo.compress_type == ZIP_DEFLATED: 1222 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, 1223 zlib.DEFLATED, -15) 1224 bytes = co.compress(bytes) + co.flush() 1225 zinfo.compress_size = len(bytes) # Compressed size 1226 else: 1227 zinfo.compress_size = zinfo.file_size 1228 zip64 = zinfo.file_size > ZIP64_LIMIT or \ 1229 zinfo.compress_size > ZIP64_LIMIT 1230 if zip64 and not self._allowZip64: 1231 raise LargeZipFile("Filesize would require ZIP64 extensions") 1232 self.fp.write(zinfo.FileHeader(zip64)) 1233 self.fp.write(bytes) 1234 if zinfo.flag_bits & 0x08: 1235 # Write CRC and file sizes after the file data 1236 fmt = '<LQQ' if zip64 else '<LLL' 1237 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, 1238 zinfo.file_size)) 1239 self.fp.flush() 1240 self.filelist.append(zinfo) 1241 self.NameToInfo[zinfo.filename] = zinfo 1242 1243 def __del__(self): 1244 """Call the "close()" method in case the user forgot.""" 1245 self.close() 1246 1247 def close(self): 1248 """Close the file, and for mode "w" and "a" write the ending 1249 records.""" 1250 if self.fp is None: 1251 return 1252 1253 try: 1254 if self.mode in ("w", "a") and self._didModify: # write ending records 1255 count = 0 1256 pos1 = self.fp.tell() 1257 for zinfo in self.filelist: # write central directory 1258 count = count + 1 1259 dt = zinfo.date_time 1260 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1261 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1262 extra = [] 1263 if zinfo.file_size > ZIP64_LIMIT \ 1264 or zinfo.compress_size > ZIP64_LIMIT: 1265 extra.append(zinfo.file_size) 1266 extra.append(zinfo.compress_size) 1267 file_size = 0xffffffff 1268 compress_size = 0xffffffff 1269 else: 1270 file_size = zinfo.file_size 1271 compress_size = zinfo.compress_size 1272 1273 if zinfo.header_offset > ZIP64_LIMIT: 1274 extra.append(zinfo.header_offset) 1275 header_offset = 0xffffffffL 1276 else: 1277 header_offset = zinfo.header_offset 1278 1279 extra_data = zinfo.extra 1280 if extra: 1281 # Append a ZIP64 field to the extra's 1282 extra_data = struct.pack( 1283 '<HH' + 'Q'*len(extra), 1284 1, 8*len(extra), *extra) + extra_data 1285 1286 extract_version = max(45, zinfo.extract_version) 1287 create_version = max(45, zinfo.create_version) 1288 else: 1289 extract_version = zinfo.extract_version 1290 create_version = zinfo.create_version 1291 1292 try: 1293 filename, flag_bits = zinfo._encodeFilenameFlags() 1294 centdir = struct.pack(structCentralDir, 1295 stringCentralDir, create_version, 1296 zinfo.create_system, extract_version, zinfo.reserved, 1297 flag_bits, zinfo.compress_type, dostime, dosdate, 1298 zinfo.CRC, compress_size, file_size, 1299 len(filename), len(extra_data), len(zinfo.comment), 1300 0, zinfo.internal_attr, zinfo.external_attr, 1301 header_offset) 1302 except DeprecationWarning: 1303 print >>sys.stderr, (structCentralDir, 1304 stringCentralDir, create_version, 1305 zinfo.create_system, extract_version, zinfo.reserved, 1306 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, 1307 zinfo.CRC, compress_size, file_size, 1308 len(zinfo.filename), len(extra_data), len(zinfo.comment), 1309 0, zinfo.internal_attr, zinfo.external_attr, 1310 header_offset) 1311 raise 1312 self.fp.write(centdir) 1313 self.fp.write(filename) 1314 self.fp.write(extra_data) 1315 self.fp.write(zinfo.comment) 1316 1317 pos2 = self.fp.tell() 1318 # Write end-of-zip-archive record 1319 centDirCount = count 1320 centDirSize = pos2 - pos1 1321 centDirOffset = pos1 1322 if (centDirCount >= ZIP_FILECOUNT_LIMIT or 1323 centDirOffset > ZIP64_LIMIT or 1324 centDirSize > ZIP64_LIMIT): 1325 # Need to write the ZIP64 end-of-archive records 1326 zip64endrec = struct.pack( 1327 structEndArchive64, stringEndArchive64, 1328 44, 45, 45, 0, 0, centDirCount, centDirCount, 1329 centDirSize, centDirOffset) 1330 self.fp.write(zip64endrec) 1331 1332 zip64locrec = struct.pack( 1333 structEndArchive64Locator, 1334 stringEndArchive64Locator, 0, pos2, 1) 1335 self.fp.write(zip64locrec) 1336 centDirCount = min(centDirCount, 0xFFFF) 1337 centDirSize = min(centDirSize, 0xFFFFFFFF) 1338 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1339 1340 endrec = struct.pack(structEndArchive, stringEndArchive, 1341 0, 0, centDirCount, centDirCount, 1342 centDirSize, centDirOffset, len(self._comment)) 1343 self.fp.write(endrec) 1344 self.fp.write(self._comment) 1345 self.fp.flush() 1346 finally: 1347 fp = self.fp 1348 self.fp = None 1349 if not self._filePassed: 1350 fp.close() 1351 1352 1353 class PyZipFile(ZipFile): 1354 """Class to create ZIP archives with Python library files and packages.""" 1355 1356 def writepy(self, pathname, basename = ""): 1357 """Add all files from "pathname" to the ZIP archive. 1358 1359 If pathname is a package directory, search the directory and 1360 all package subdirectories recursively for all *.py and enter 1361 the modules into the archive. If pathname is a plain 1362 directory, listdir *.py and enter all modules. Else, pathname 1363 must be a Python *.py file and the module will be put into the 1364 archive. Added modules are always module.pyo or module.pyc. 1365 This method will compile the module.py into module.pyc if 1366 necessary. 1367 """ 1368 dir, name = os.path.split(pathname) 1369 if os.path.isdir(pathname): 1370 initname = os.path.join(pathname, "__init__.py") 1371 if os.path.isfile(initname): 1372 # This is a package directory, add it 1373 if basename: 1374 basename = "%s/%s" % (basename, name) 1375 else: 1376 basename = name 1377 if self.debug: 1378 print "Adding package in", pathname, "as", basename 1379 fname, arcname = self._get_codename(initname[0:-3], basename) 1380 if self.debug: 1381 print "Adding", arcname 1382 self.write(fname, arcname) 1383 dirlist = os.listdir(pathname) 1384 dirlist.remove("__init__.py") 1385 # Add all *.py files and package subdirectories 1386 for filename in dirlist: 1387 path = os.path.join(pathname, filename) 1388 root, ext = os.path.splitext(filename) 1389 if os.path.isdir(path): 1390 if os.path.isfile(os.path.join(path, "__init__.py")): 1391 # This is a package directory, add it 1392 self.writepy(path, basename) # Recursive call 1393 elif ext == ".py": 1394 fname, arcname = self._get_codename(path[0:-3], 1395 basename) 1396 if self.debug: 1397 print "Adding", arcname 1398 self.write(fname, arcname) 1399 else: 1400 # This is NOT a package directory, add its files at top level 1401 if self.debug: 1402 print "Adding files from directory", pathname 1403 for filename in os.listdir(pathname): 1404 path = os.path.join(pathname, filename) 1405 root, ext = os.path.splitext(filename) 1406 if ext == ".py": 1407 fname, arcname = self._get_codename(path[0:-3], 1408 basename) 1409 if self.debug: 1410 print "Adding", arcname 1411 self.write(fname, arcname) 1412 else: 1413 if pathname[-3:] != ".py": 1414 raise RuntimeError, \ 1415 'Files added with writepy() must end with ".py"' 1416 fname, arcname = self._get_codename(pathname[0:-3], basename) 1417 if self.debug: 1418 print "Adding file", arcname 1419 self.write(fname, arcname) 1420 1421 def _get_codename(self, pathname, basename): 1422 """Return (filename, archivename) for the path. 1423 1424 Given a module name path, return the correct file path and 1425 archive name, compiling if necessary. For example, given 1426 /python/lib/string, return (/python/lib/string.pyc, string). 1427 """ 1428 file_py = pathname + ".py" 1429 file_pyc = pathname + ".pyc" 1430 file_pyo = pathname + ".pyo" 1431 if os.path.isfile(file_pyo) and \ 1432 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime: 1433 fname = file_pyo # Use .pyo file 1434 elif not os.path.isfile(file_pyc) or \ 1435 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime: 1436 import py_compile 1437 if self.debug: 1438 print "Compiling", file_py 1439 try: 1440 py_compile.compile(file_py, file_pyc, None, True) 1441 except py_compile.PyCompileError,err: 1442 print err.msg 1443 fname = file_pyc 1444 else: 1445 fname = file_pyc 1446 archivename = os.path.split(fname)[1] 1447 if basename: 1448 archivename = "%s/%s" % (basename, archivename) 1449 return (fname, archivename) 1450 1451 1452 def main(args = None): 1453 import textwrap 1454 USAGE=textwrap.dedent("""\ 1455 Usage: 1456 zipfile.py -l zipfile.zip # Show listing of a zipfile 1457 zipfile.py -t zipfile.zip # Test if a zipfile is valid 1458 zipfile.py -e zipfile.zip target # Extract zipfile into target dir 1459 zipfile.py -c zipfile.zip src ... # Create zipfile from sources 1460 """) 1461 if args is None: 1462 args = sys.argv[1:] 1463 1464 if not args or args[0] not in ('-l', '-c', '-e', '-t'): 1465 print USAGE 1466 sys.exit(1) 1467 1468 if args[0] == '-l': 1469 if len(args) != 2: 1470 print USAGE 1471 sys.exit(1) 1472 with ZipFile(args[1], 'r') as zf: 1473 zf.printdir() 1474 1475 elif args[0] == '-t': 1476 if len(args) != 2: 1477 print USAGE 1478 sys.exit(1) 1479 with ZipFile(args[1], 'r') as zf: 1480 badfile = zf.testzip() 1481 if badfile: 1482 print("The following enclosed file is corrupted: {!r}".format(badfile)) 1483 print "Done testing" 1484 1485 elif args[0] == '-e': 1486 if len(args) != 3: 1487 print USAGE 1488 sys.exit(1) 1489 1490 with ZipFile(args[1], 'r') as zf: 1491 out = args[2] 1492 for path in zf.namelist(): 1493 if path.startswith('./'): 1494 tgt = os.path.join(out, path[2:]) 1495 else: 1496 tgt = os.path.join(out, path) 1497 1498 tgtdir = os.path.dirname(tgt) 1499 if not os.path.exists(tgtdir): 1500 os.makedirs(tgtdir) 1501 with open(tgt, 'wb') as fp: 1502 fp.write(zf.read(path)) 1503 1504 elif args[0] == '-c': 1505 if len(args) < 3: 1506 print USAGE 1507 sys.exit(1) 1508 1509 def addToZip(zf, path, zippath): 1510 if os.path.isfile(path): 1511 zf.write(path, zippath, ZIP_DEFLATED) 1512 elif os.path.isdir(path): 1513 for nm in os.listdir(path): 1514 addToZip(zf, 1515 os.path.join(path, nm), os.path.join(zippath, nm)) 1516 # else: ignore 1517 1518 with ZipFile(args[1], 'w', allowZip64=True) as zf: 1519 for src in args[2:]: 1520 addToZip(zf, src, os.path.basename(src)) 1521 1522 if __name__ == "__main__": 1523 main() 1524