Home | History | Annotate | Download | only in python2.7
      1 """
      2 Read and write ZIP files.
      3 """
      4 import struct, os, time, sys, shutil
      5 import binascii, cStringIO, stat
      6 import io
      7 import re
      8 import string
      9 
     10 try:
     11     import zlib # We may need its compression method
     12     crc32 = zlib.crc32
     13 except ImportError:
     14     zlib = None
     15     crc32 = binascii.crc32
     16 
     17 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
     18            "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
     19 
     20 class BadZipfile(Exception):
     21     pass
     22 
     23 
     24 class LargeZipFile(Exception):
     25     """
     26     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
     27     and those extensions are disabled.
     28     """
     29 
     30 error = BadZipfile      # The exception raised by this module
     31 
     32 ZIP64_LIMIT = (1 << 31) - 1
     33 ZIP_FILECOUNT_LIMIT = 1 << 16
     34 ZIP_MAX_COMMENT = (1 << 16) - 1
     35 
     36 # constants for Zip file compression methods
     37 ZIP_STORED = 0
     38 ZIP_DEFLATED = 8
     39 # Other ZIP compression methods not supported
     40 
     41 # Below are some formats and associated data for reading/writing headers using
     42 # the struct module.  The names and structures of headers/records are those used
     43 # in the PKWARE description of the ZIP file format:
     44 #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
     45 # (URL valid as of January 2008)
     46 
     47 # The "end of central directory" structure, magic number, size, and indices
     48 # (section V.I in the format document)
     49 structEndArchive = "<4s4H2LH"
     50 stringEndArchive = "PK\005\006"
     51 sizeEndCentDir = struct.calcsize(structEndArchive)
     52 
     53 _ECD_SIGNATURE = 0
     54 _ECD_DISK_NUMBER = 1
     55 _ECD_DISK_START = 2
     56 _ECD_ENTRIES_THIS_DISK = 3
     57 _ECD_ENTRIES_TOTAL = 4
     58 _ECD_SIZE = 5
     59 _ECD_OFFSET = 6
     60 _ECD_COMMENT_SIZE = 7
     61 # These last two indices are not part of the structure as defined in the
     62 # spec, but they are used internally by this module as a convenience
     63 _ECD_COMMENT = 8
     64 _ECD_LOCATION = 9
     65 
     66 # The "central directory" structure, magic number, size, and indices
     67 # of entries in the structure (section V.F in the format document)
     68 structCentralDir = "<4s4B4HL2L5H2L"
     69 stringCentralDir = "PK\001\002"
     70 sizeCentralDir = struct.calcsize(structCentralDir)
     71 
     72 # indexes of entries in the central directory structure
     73 _CD_SIGNATURE = 0
     74 _CD_CREATE_VERSION = 1
     75 _CD_CREATE_SYSTEM = 2
     76 _CD_EXTRACT_VERSION = 3
     77 _CD_EXTRACT_SYSTEM = 4
     78 _CD_FLAG_BITS = 5
     79 _CD_COMPRESS_TYPE = 6
     80 _CD_TIME = 7
     81 _CD_DATE = 8
     82 _CD_CRC = 9
     83 _CD_COMPRESSED_SIZE = 10
     84 _CD_UNCOMPRESSED_SIZE = 11
     85 _CD_FILENAME_LENGTH = 12
     86 _CD_EXTRA_FIELD_LENGTH = 13
     87 _CD_COMMENT_LENGTH = 14
     88 _CD_DISK_NUMBER_START = 15
     89 _CD_INTERNAL_FILE_ATTRIBUTES = 16
     90 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
     91 _CD_LOCAL_HEADER_OFFSET = 18
     92 
     93 # The "local file header" structure, magic number, size, and indices
     94 # (section V.A in the format document)
     95 structFileHeader = "<4s2B4HL2L2H"
     96 stringFileHeader = "PK\003\004"
     97 sizeFileHeader = struct.calcsize(structFileHeader)
     98 
     99 _FH_SIGNATURE = 0
    100 _FH_EXTRACT_VERSION = 1
    101 _FH_EXTRACT_SYSTEM = 2
    102 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
    103 _FH_COMPRESSION_METHOD = 4
    104 _FH_LAST_MOD_TIME = 5
    105 _FH_LAST_MOD_DATE = 6
    106 _FH_CRC = 7
    107 _FH_COMPRESSED_SIZE = 8
    108 _FH_UNCOMPRESSED_SIZE = 9
    109 _FH_FILENAME_LENGTH = 10
    110 _FH_EXTRA_FIELD_LENGTH = 11
    111 
    112 # The "Zip64 end of central directory locator" structure, magic number, and size
    113 structEndArchive64Locator = "<4sLQL"
    114 stringEndArchive64Locator = "PK\x06\x07"
    115 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
    116 
    117 # The "Zip64 end of central directory" record, magic number, size, and indices
    118 # (section V.G in the format document)
    119 structEndArchive64 = "<4sQ2H2L4Q"
    120 stringEndArchive64 = "PK\x06\x06"
    121 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
    122 
    123 _CD64_SIGNATURE = 0
    124 _CD64_DIRECTORY_RECSIZE = 1
    125 _CD64_CREATE_VERSION = 2
    126 _CD64_EXTRACT_VERSION = 3
    127 _CD64_DISK_NUMBER = 4
    128 _CD64_DISK_NUMBER_START = 5
    129 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
    130 _CD64_NUMBER_ENTRIES_TOTAL = 7
    131 _CD64_DIRECTORY_SIZE = 8
    132 _CD64_OFFSET_START_CENTDIR = 9
    133 
    134 def _check_zipfile(fp):
    135     try:
    136         if _EndRecData(fp):
    137             return True         # file has correct magic number
    138     except IOError:
    139         pass
    140     return False
    141 
    142 def is_zipfile(filename):
    143     """Quickly see if a file is a ZIP file by checking the magic number.
    144 
    145     The filename argument may be a file or file-like object too.
    146     """
    147     result = False
    148     try:
    149         if hasattr(filename, "read"):
    150             result = _check_zipfile(fp=filename)
    151         else:
    152             with open(filename, "rb") as fp:
    153                 result = _check_zipfile(fp)
    154     except IOError:
    155         pass
    156     return result
    157 
    158 def _EndRecData64(fpin, offset, endrec):
    159     """
    160     Read the ZIP64 end-of-archive records and use that to update endrec
    161     """
    162     try:
    163         fpin.seek(offset - sizeEndCentDir64Locator, 2)
    164     except IOError:
    165         # If the seek fails, the file is not large enough to contain a ZIP64
    166         # end-of-archive record, so just return the end record we were given.
    167         return endrec
    168 
    169     data = fpin.read(sizeEndCentDir64Locator)
    170     if len(data) != sizeEndCentDir64Locator:
    171         return endrec
    172     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
    173     if sig != stringEndArchive64Locator:
    174         return endrec
    175 
    176     if diskno != 0 or disks != 1:
    177         raise BadZipfile("zipfiles that span multiple disks are not supported")
    178 
    179     # Assume no 'zip64 extensible data'
    180     fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
    181     data = fpin.read(sizeEndCentDir64)
    182     if len(data) != sizeEndCentDir64:
    183         return endrec
    184     sig, sz, create_version, read_version, disk_num, disk_dir, \
    185             dircount, dircount2, dirsize, diroffset = \
    186             struct.unpack(structEndArchive64, data)
    187     if sig != stringEndArchive64:
    188         return endrec
    189 
    190     # Update the original endrec using data from the ZIP64 record
    191     endrec[_ECD_SIGNATURE] = sig
    192     endrec[_ECD_DISK_NUMBER] = disk_num
    193     endrec[_ECD_DISK_START] = disk_dir
    194     endrec[_ECD_ENTRIES_THIS_DISK] = dircount
    195     endrec[_ECD_ENTRIES_TOTAL] = dircount2
    196     endrec[_ECD_SIZE] = dirsize
    197     endrec[_ECD_OFFSET] = diroffset
    198     return endrec
    199 
    200 
    201 def _EndRecData(fpin):
    202     """Return data from the "End of Central Directory" record, or None.
    203 
    204     The data is a list of the nine items in the ZIP "End of central dir"
    205     record followed by a tenth item, the file seek offset of this record."""
    206 
    207     # Determine file size
    208     fpin.seek(0, 2)
    209     filesize = fpin.tell()
    210 
    211     # Check to see if this is ZIP file with no archive comment (the
    212     # "end of central directory" structure should be the last item in the
    213     # file if this is the case).
    214     try:
    215         fpin.seek(-sizeEndCentDir, 2)
    216     except IOError:
    217         return None
    218     data = fpin.read()
    219     if (len(data) == sizeEndCentDir and
    220         data[0:4] == stringEndArchive and
    221         data[-2:] == b"\000\000"):
    222         # the signature is correct and there's no comment, unpack structure
    223         endrec = struct.unpack(structEndArchive, data)
    224         endrec=list(endrec)
    225 
    226         # Append a blank comment and record start offset
    227         endrec.append("")
    228         endrec.append(filesize - sizeEndCentDir)
    229 
    230         # Try to read the "Zip64 end of central directory" structure
    231         return _EndRecData64(fpin, -sizeEndCentDir, endrec)
    232 
    233     # Either this is not a ZIP file, or it is a ZIP file with an archive
    234     # comment.  Search the end of the file for the "end of central directory"
    235     # record signature. The comment is the last item in the ZIP file and may be
    236     # up to 64K long.  It is assumed that the "end of central directory" magic
    237     # number does not appear in the comment.
    238     maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
    239     fpin.seek(maxCommentStart, 0)
    240     data = fpin.read()
    241     start = data.rfind(stringEndArchive)
    242     if start >= 0:
    243         # found the magic number; attempt to unpack and interpret
    244         recData = data[start:start+sizeEndCentDir]
    245         if len(recData) != sizeEndCentDir:
    246             # Zip file is corrupted.
    247             return None
    248         endrec = list(struct.unpack(structEndArchive, recData))
    249         commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
    250         comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
    251         endrec.append(comment)
    252         endrec.append(maxCommentStart + start)
    253 
    254         # Try to read the "Zip64 end of central directory" structure
    255         return _EndRecData64(fpin, maxCommentStart + start - filesize,
    256                              endrec)
    257 
    258     # Unable to find a valid end of central directory structure
    259     return None
    260 
    261 
    262 class ZipInfo (object):
    263     """Class with attributes describing each file in the ZIP archive."""
    264 
    265     __slots__ = (
    266             'orig_filename',
    267             'filename',
    268             'date_time',
    269             'compress_type',
    270             'comment',
    271             'extra',
    272             'create_system',
    273             'create_version',
    274             'extract_version',
    275             'reserved',
    276             'flag_bits',
    277             'volume',
    278             'internal_attr',
    279             'external_attr',
    280             'header_offset',
    281             'CRC',
    282             'compress_size',
    283             'file_size',
    284             '_raw_time',
    285         )
    286 
    287     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
    288         self.orig_filename = filename   # Original file name in archive
    289 
    290         # Terminate the file name at the first null byte.  Null bytes in file
    291         # names are used as tricks by viruses in archives.
    292         null_byte = filename.find(chr(0))
    293         if null_byte >= 0:
    294             filename = filename[0:null_byte]
    295         # This is used to ensure paths in generated ZIP files always use
    296         # forward slashes as the directory separator, as required by the
    297         # ZIP format specification.
    298         if os.sep != "/" and os.sep in filename:
    299             filename = filename.replace(os.sep, "/")
    300 
    301         self.filename = filename        # Normalized file name
    302         self.date_time = date_time      # year, month, day, hour, min, sec
    303 
    304         if date_time[0] < 1980:
    305             raise ValueError('ZIP does not support timestamps before 1980')
    306 
    307         # Standard values:
    308         self.compress_type = ZIP_STORED # Type of compression for the file
    309         self.comment = ""               # Comment for each file
    310         self.extra = ""                 # ZIP extra data
    311         if sys.platform == 'win32':
    312             self.create_system = 0          # System which created ZIP archive
    313         else:
    314             # Assume everything else is unix-y
    315             self.create_system = 3          # System which created ZIP archive
    316         self.create_version = 20        # Version which created ZIP archive
    317         self.extract_version = 20       # Version needed to extract archive
    318         self.reserved = 0               # Must be zero
    319         self.flag_bits = 0              # ZIP flag bits
    320         self.volume = 0                 # Volume number of file header
    321         self.internal_attr = 0          # Internal attributes
    322         self.external_attr = 0          # External file attributes
    323         # Other attributes are set by class ZipFile:
    324         # header_offset         Byte offset to the file header
    325         # CRC                   CRC-32 of the uncompressed file
    326         # compress_size         Size of the compressed file
    327         # file_size             Size of the uncompressed file
    328 
    329     def FileHeader(self, zip64=None):
    330         """Return the per-file header as a string."""
    331         dt = self.date_time
    332         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
    333         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
    334         if self.flag_bits & 0x08:
    335             # Set these to zero because we write them after the file data
    336             CRC = compress_size = file_size = 0
    337         else:
    338             CRC = self.CRC
    339             compress_size = self.compress_size
    340             file_size = self.file_size
    341 
    342         extra = self.extra
    343 
    344         if zip64 is None:
    345             zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
    346         if zip64:
    347             fmt = '<HHQQ'
    348             extra = extra + struct.pack(fmt,
    349                     1, struct.calcsize(fmt)-4, file_size, compress_size)
    350         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
    351             if not zip64:
    352                 raise LargeZipFile("Filesize would require ZIP64 extensions")
    353             # File is larger than what fits into a 4 byte integer,
    354             # fall back to the ZIP64 extension
    355             file_size = 0xffffffff
    356             compress_size = 0xffffffff
    357             self.extract_version = max(45, self.extract_version)
    358             self.create_version = max(45, self.extract_version)
    359 
    360         filename, flag_bits = self._encodeFilenameFlags()
    361         header = struct.pack(structFileHeader, stringFileHeader,
    362                  self.extract_version, self.reserved, flag_bits,
    363                  self.compress_type, dostime, dosdate, CRC,
    364                  compress_size, file_size,
    365                  len(filename), len(extra))
    366         return header + filename + extra
    367 
    368     def _encodeFilenameFlags(self):
    369         if isinstance(self.filename, unicode):
    370             try:
    371                 return self.filename.encode('ascii'), self.flag_bits
    372             except UnicodeEncodeError:
    373                 return self.filename.encode('utf-8'), self.flag_bits | 0x800
    374         else:
    375             return self.filename, self.flag_bits
    376 
    377     def _decodeFilename(self):
    378         if self.flag_bits & 0x800:
    379             return self.filename.decode('utf-8')
    380         else:
    381             return self.filename
    382 
    383     def _decodeExtra(self):
    384         # Try to decode the extra field.
    385         extra = self.extra
    386         unpack = struct.unpack
    387         while extra:
    388             tp, ln = unpack('<HH', extra[:4])
    389             if tp == 1:
    390                 if ln >= 24:
    391                     counts = unpack('<QQQ', extra[4:28])
    392                 elif ln == 16:
    393                     counts = unpack('<QQ', extra[4:20])
    394                 elif ln == 8:
    395                     counts = unpack('<Q', extra[4:12])
    396                 elif ln == 0:
    397                     counts = ()
    398                 else:
    399                     raise RuntimeError, "Corrupt extra field %s"%(ln,)
    400 
    401                 idx = 0
    402 
    403                 # ZIP64 extension (large files and/or large archives)
    404                 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
    405                     self.file_size = counts[idx]
    406                     idx += 1
    407 
    408                 if self.compress_size == 0xFFFFFFFFL:
    409                     self.compress_size = counts[idx]
    410                     idx += 1
    411 
    412                 if self.header_offset == 0xffffffffL:
    413                     old = self.header_offset
    414                     self.header_offset = counts[idx]
    415                     idx+=1
    416 
    417             extra = extra[ln+4:]
    418 
    419 
    420 class _ZipDecrypter:
    421     """Class to handle decryption of files stored within a ZIP archive.
    422 
    423     ZIP supports a password-based form of encryption. Even though known
    424     plaintext attacks have been found against it, it is still useful
    425     to be able to get data out of such a file.
    426 
    427     Usage:
    428         zd = _ZipDecrypter(mypwd)
    429         plain_char = zd(cypher_char)
    430         plain_text = map(zd, cypher_text)
    431     """
    432 
    433     def _GenerateCRCTable():
    434         """Generate a CRC-32 table.
    435 
    436         ZIP encryption uses the CRC32 one-byte primitive for scrambling some
    437         internal keys. We noticed that a direct implementation is faster than
    438         relying on binascii.crc32().
    439         """
    440         poly = 0xedb88320
    441         table = [0] * 256
    442         for i in range(256):
    443             crc = i
    444             for j in range(8):
    445                 if crc & 1:
    446                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
    447                 else:
    448                     crc = ((crc >> 1) & 0x7FFFFFFF)
    449             table[i] = crc
    450         return table
    451     crctable = _GenerateCRCTable()
    452 
    453     def _crc32(self, ch, crc):
    454         """Compute the CRC32 primitive on one byte."""
    455         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
    456 
    457     def __init__(self, pwd):
    458         self.key0 = 305419896
    459         self.key1 = 591751049
    460         self.key2 = 878082192
    461         for p in pwd:
    462             self._UpdateKeys(p)
    463 
    464     def _UpdateKeys(self, c):
    465         self.key0 = self._crc32(c, self.key0)
    466         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
    467         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
    468         self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
    469 
    470     def __call__(self, c):
    471         """Decrypt a single character."""
    472         c = ord(c)
    473         k = self.key2 | 2
    474         c = c ^ (((k * (k^1)) >> 8) & 255)
    475         c = chr(c)
    476         self._UpdateKeys(c)
    477         return c
    478 
    479 
    480 compressor_names = {
    481     0: 'store',
    482     1: 'shrink',
    483     2: 'reduce',
    484     3: 'reduce',
    485     4: 'reduce',
    486     5: 'reduce',
    487     6: 'implode',
    488     7: 'tokenize',
    489     8: 'deflate',
    490     9: 'deflate64',
    491     10: 'implode',
    492     12: 'bzip2',
    493     14: 'lzma',
    494     18: 'terse',
    495     19: 'lz77',
    496     97: 'wavpack',
    497     98: 'ppmd',
    498 }
    499 
    500 
    501 class ZipExtFile(io.BufferedIOBase):
    502     """File-like object for reading an archive member.
    503        Is returned by ZipFile.open().
    504     """
    505 
    506     # Max size supported by decompressor.
    507     MAX_N = 1 << 31 - 1
    508 
    509     # Read from compressed files in 4k blocks.
    510     MIN_READ_SIZE = 4096
    511 
    512     # Search for universal newlines or line chunks.
    513     PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
    514 
    515     def __init__(self, fileobj, mode, zipinfo, decrypter=None,
    516             close_fileobj=False):
    517         self._fileobj = fileobj
    518         self._decrypter = decrypter
    519         self._close_fileobj = close_fileobj
    520 
    521         self._compress_type = zipinfo.compress_type
    522         self._compress_size = zipinfo.compress_size
    523         self._compress_left = zipinfo.compress_size
    524 
    525         if self._compress_type == ZIP_DEFLATED:
    526             self._decompressor = zlib.decompressobj(-15)
    527         elif self._compress_type != ZIP_STORED:
    528             descr = compressor_names.get(self._compress_type)
    529             if descr:
    530                 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
    531             else:
    532                 raise NotImplementedError("compression type %d" % (self._compress_type,))
    533         self._unconsumed = ''
    534 
    535         self._readbuffer = ''
    536         self._offset = 0
    537 
    538         self._universal = 'U' in mode
    539         self.newlines = None
    540 
    541         # Adjust read size for encrypted files since the first 12 bytes
    542         # are for the encryption/password information.
    543         if self._decrypter is not None:
    544             self._compress_left -= 12
    545 
    546         self.mode = mode
    547         self.name = zipinfo.filename
    548 
    549         if hasattr(zipinfo, 'CRC'):
    550             self._expected_crc = zipinfo.CRC
    551             self._running_crc = crc32(b'') & 0xffffffff
    552         else:
    553             self._expected_crc = None
    554 
    555     def readline(self, limit=-1):
    556         """Read and return a line from the stream.
    557 
    558         If limit is specified, at most limit bytes will be read.
    559         """
    560 
    561         if not self._universal and limit < 0:
    562             # Shortcut common case - newline found in buffer.
    563             i = self._readbuffer.find('\n', self._offset) + 1
    564             if i > 0:
    565                 line = self._readbuffer[self._offset: i]
    566                 self._offset = i
    567                 return line
    568 
    569         if not self._universal:
    570             return io.BufferedIOBase.readline(self, limit)
    571 
    572         line = ''
    573         while limit < 0 or len(line) < limit:
    574             readahead = self.peek(2)
    575             if readahead == '':
    576                 return line
    577 
    578             #
    579             # Search for universal newlines or line chunks.
    580             #
    581             # The pattern returns either a line chunk or a newline, but not
    582             # both. Combined with peek(2), we are assured that the sequence
    583             # '\r\n' is always retrieved completely and never split into
    584             # separate newlines - '\r', '\n' due to coincidental readaheads.
    585             #
    586             match = self.PATTERN.search(readahead)
    587             newline = match.group('newline')
    588             if newline is not None:
    589                 if self.newlines is None:
    590                     self.newlines = []
    591                 if newline not in self.newlines:
    592                     self.newlines.append(newline)
    593                 self._offset += len(newline)
    594                 return line + '\n'
    595 
    596             chunk = match.group('chunk')
    597             if limit >= 0:
    598                 chunk = chunk[: limit - len(line)]
    599 
    600             self._offset += len(chunk)
    601             line += chunk
    602 
    603         return line
    604 
    605     def peek(self, n=1):
    606         """Returns buffered bytes without advancing the position."""
    607         if n > len(self._readbuffer) - self._offset:
    608             chunk = self.read(n)
    609             self._offset -= len(chunk)
    610 
    611         # Return up to 512 bytes to reduce allocation overhead for tight loops.
    612         return self._readbuffer[self._offset: self._offset + 512]
    613 
    614     def readable(self):
    615         return True
    616 
    617     def read(self, n=-1):
    618         """Read and return up to n bytes.
    619         If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
    620         """
    621         buf = ''
    622         if n is None:
    623             n = -1
    624         while True:
    625             if n < 0:
    626                 data = self.read1(n)
    627             elif n > len(buf):
    628                 data = self.read1(n - len(buf))
    629             else:
    630                 return buf
    631             if len(data) == 0:
    632                 return buf
    633             buf += data
    634 
    635     def _update_crc(self, newdata, eof):
    636         # Update the CRC using the given data.
    637         if self._expected_crc is None:
    638             # No need to compute the CRC if we don't have a reference value
    639             return
    640         self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
    641         # Check the CRC if we're at the end of the file
    642         if eof and self._running_crc != self._expected_crc:
    643             raise BadZipfile("Bad CRC-32 for file %r" % self.name)
    644 
    645     def read1(self, n):
    646         """Read up to n bytes with at most one read() system call."""
    647 
    648         # Simplify algorithm (branching) by transforming negative n to large n.
    649         if n < 0 or n is None:
    650             n = self.MAX_N
    651 
    652         # Bytes available in read buffer.
    653         len_readbuffer = len(self._readbuffer) - self._offset
    654 
    655         # Read from file.
    656         if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
    657             nbytes = n - len_readbuffer - len(self._unconsumed)
    658             nbytes = max(nbytes, self.MIN_READ_SIZE)
    659             nbytes = min(nbytes, self._compress_left)
    660 
    661             data = self._fileobj.read(nbytes)
    662             self._compress_left -= len(data)
    663 
    664             if data and self._decrypter is not None:
    665                 data = ''.join(map(self._decrypter, data))
    666 
    667             if self._compress_type == ZIP_STORED:
    668                 self._update_crc(data, eof=(self._compress_left==0))
    669                 self._readbuffer = self._readbuffer[self._offset:] + data
    670                 self._offset = 0
    671             else:
    672                 # Prepare deflated bytes for decompression.
    673                 self._unconsumed += data
    674 
    675         # Handle unconsumed data.
    676         if (len(self._unconsumed) > 0 and n > len_readbuffer and
    677             self._compress_type == ZIP_DEFLATED):
    678             data = self._decompressor.decompress(
    679                 self._unconsumed,
    680                 max(n - len_readbuffer, self.MIN_READ_SIZE)
    681             )
    682 
    683             self._unconsumed = self._decompressor.unconsumed_tail
    684             eof = len(self._unconsumed) == 0 and self._compress_left == 0
    685             if eof:
    686                 data += self._decompressor.flush()
    687 
    688             self._update_crc(data, eof=eof)
    689             self._readbuffer = self._readbuffer[self._offset:] + data
    690             self._offset = 0
    691 
    692         # Read from buffer.
    693         data = self._readbuffer[self._offset: self._offset + n]
    694         self._offset += len(data)
    695         return data
    696 
    697     def close(self):
    698         try :
    699             if self._close_fileobj:
    700                 self._fileobj.close()
    701         finally:
    702             super(ZipExtFile, self).close()
    703 
    704 
    705 class ZipFile(object):
    706     """ Class with methods to open, read, write, close, list zip files.
    707 
    708     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
    709 
    710     file: Either the path to the file, or a file-like object.
    711           If it is a path, the file will be opened and closed by ZipFile.
    712     mode: The mode can be either read "r", write "w" or append "a".
    713     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
    714     allowZip64: if True ZipFile will create files with ZIP64 extensions when
    715                 needed, otherwise it will raise an exception when this would
    716                 be necessary.
    717 
    718     """
    719 
    720     fp = None                   # Set here since __del__ checks it
    721 
    722     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
    723         """Open the ZIP file with mode read "r", write "w" or append "a"."""
    724         if mode not in ("r", "w", "a"):
    725             raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
    726 
    727         if compression == ZIP_STORED:
    728             pass
    729         elif compression == ZIP_DEFLATED:
    730             if not zlib:
    731                 raise RuntimeError,\
    732                       "Compression requires the (missing) zlib module"
    733         else:
    734             raise RuntimeError, "That compression method is not supported"
    735 
    736         self._allowZip64 = allowZip64
    737         self._didModify = False
    738         self.debug = 0  # Level of printing: 0 through 3
    739         self.NameToInfo = {}    # Find file info given name
    740         self.filelist = []      # List of ZipInfo instances for archive
    741         self.compression = compression  # Method of compression
    742         self.mode = key = mode.replace('b', '')[0]
    743         self.pwd = None
    744         self._comment = ''
    745 
    746         # Check if we were passed a file-like object
    747         if isinstance(file, basestring):
    748             self._filePassed = 0
    749             self.filename = file
    750             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
    751             try:
    752                 self.fp = open(file, modeDict[mode])
    753             except IOError:
    754                 if mode == 'a':
    755                     mode = key = 'w'
    756                     self.fp = open(file, modeDict[mode])
    757                 else:
    758                     raise
    759         else:
    760             self._filePassed = 1
    761             self.fp = file
    762             self.filename = getattr(file, 'name', None)
    763 
    764         try:
    765             if key == 'r':
    766                 self._RealGetContents()
    767             elif key == 'w':
    768                 # set the modified flag so central directory gets written
    769                 # even if no files are added to the archive
    770                 self._didModify = True
    771             elif key == 'a':
    772                 try:
    773                     # See if file is a zip file
    774                     self._RealGetContents()
    775                     # seek to start of directory and overwrite
    776                     self.fp.seek(self.start_dir, 0)
    777                 except BadZipfile:
    778                     # file is not a zip file, just append
    779                     self.fp.seek(0, 2)
    780 
    781                     # set the modified flag so central directory gets written
    782                     # even if no files are added to the archive
    783                     self._didModify = True
    784             else:
    785                 raise RuntimeError('Mode must be "r", "w" or "a"')
    786         except:
    787             fp = self.fp
    788             self.fp = None
    789             if not self._filePassed:
    790                 fp.close()
    791             raise
    792 
    793     def __enter__(self):
    794         return self
    795 
    796     def __exit__(self, type, value, traceback):
    797         self.close()
    798 
    799     def _RealGetContents(self):
    800         """Read in the table of contents for the ZIP file."""
    801         fp = self.fp
    802         try:
    803             endrec = _EndRecData(fp)
    804         except IOError:
    805             raise BadZipfile("File is not a zip file")
    806         if not endrec:
    807             raise BadZipfile, "File is not a zip file"
    808         if self.debug > 1:
    809             print endrec
    810         size_cd = endrec[_ECD_SIZE]             # bytes in central directory
    811         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
    812         self._comment = endrec[_ECD_COMMENT]    # archive comment
    813 
    814         # "concat" is zero, unless zip was concatenated to another file
    815         concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
    816         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
    817             # If Zip64 extension structures are present, account for them
    818             concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
    819 
    820         if self.debug > 2:
    821             inferred = concat + offset_cd
    822             print "given, inferred, offset", offset_cd, inferred, concat
    823         # self.start_dir:  Position of start of central directory
    824         self.start_dir = offset_cd + concat
    825         fp.seek(self.start_dir, 0)
    826         data = fp.read(size_cd)
    827         fp = cStringIO.StringIO(data)
    828         total = 0
    829         while total < size_cd:
    830             centdir = fp.read(sizeCentralDir)
    831             if len(centdir) != sizeCentralDir:
    832                 raise BadZipfile("Truncated central directory")
    833             centdir = struct.unpack(structCentralDir, centdir)
    834             if centdir[_CD_SIGNATURE] != stringCentralDir:
    835                 raise BadZipfile("Bad magic number for central directory")
    836             if self.debug > 2:
    837                 print centdir
    838             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
    839             # Create ZipInfo instance to store file information
    840             x = ZipInfo(filename)
    841             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
    842             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
    843             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
    844             (x.create_version, x.create_system, x.extract_version, x.reserved,
    845                 x.flag_bits, x.compress_type, t, d,
    846                 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
    847             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
    848             # Convert date/time code to (year, month, day, hour, min, sec)
    849             x._raw_time = t
    850             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
    851                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
    852 
    853             x._decodeExtra()
    854             x.header_offset = x.header_offset + concat
    855             x.filename = x._decodeFilename()
    856             self.filelist.append(x)
    857             self.NameToInfo[x.filename] = x
    858 
    859             # update total bytes read from central directory
    860             total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
    861                      + centdir[_CD_EXTRA_FIELD_LENGTH]
    862                      + centdir[_CD_COMMENT_LENGTH])
    863 
    864             if self.debug > 2:
    865                 print "total", total
    866 
    867 
    868     def namelist(self):
    869         """Return a list of file names in the archive."""
    870         l = []
    871         for data in self.filelist:
    872             l.append(data.filename)
    873         return l
    874 
    875     def infolist(self):
    876         """Return a list of class ZipInfo instances for files in the
    877         archive."""
    878         return self.filelist
    879 
    880     def printdir(self):
    881         """Print a table of contents for the zip file."""
    882         print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
    883         for zinfo in self.filelist:
    884             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
    885             print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
    886 
    887     def testzip(self):
    888         """Read all the files and check the CRC."""
    889         chunk_size = 2 ** 20
    890         for zinfo in self.filelist:
    891             try:
    892                 # Read by chunks, to avoid an OverflowError or a
    893                 # MemoryError with very large embedded files.
    894                 with self.open(zinfo.filename, "r") as f:
    895                     while f.read(chunk_size):     # Check CRC-32
    896                         pass
    897             except BadZipfile:
    898                 return zinfo.filename
    899 
    900     def getinfo(self, name):
    901         """Return the instance of ZipInfo given 'name'."""
    902         info = self.NameToInfo.get(name)
    903         if info is None:
    904             raise KeyError(
    905                 'There is no item named %r in the archive' % name)
    906 
    907         return info
    908 
    909     def setpassword(self, pwd):
    910         """Set default password for encrypted files."""
    911         self.pwd = pwd
    912 
    913     @property
    914     def comment(self):
    915         """The comment text associated with the ZIP file."""
    916         return self._comment
    917 
    918     @comment.setter
    919     def comment(self, comment):
    920         # check for valid comment length
    921         if len(comment) >= ZIP_MAX_COMMENT:
    922             if self.debug:
    923                 print('Archive comment is too long; truncating to %d bytes'
    924                         % ZIP_MAX_COMMENT)
    925             comment = comment[:ZIP_MAX_COMMENT]
    926         self._comment = comment
    927         self._didModify = True
    928 
    929     def read(self, name, pwd=None):
    930         """Return file bytes (as a string) for name."""
    931         return self.open(name, "r", pwd).read()
    932 
    933     def open(self, name, mode="r", pwd=None):
    934         """Return file-like object for 'name'."""
    935         if mode not in ("r", "U", "rU"):
    936             raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
    937         if not self.fp:
    938             raise RuntimeError, \
    939                   "Attempt to read ZIP archive that was already closed"
    940 
    941         # Only open a new file for instances where we were not
    942         # given a file object in the constructor
    943         if self._filePassed:
    944             zef_file = self.fp
    945             should_close = False
    946         else:
    947             zef_file = open(self.filename, 'rb')
    948             should_close = True
    949 
    950         try:
    951             # Make sure we have an info object
    952             if isinstance(name, ZipInfo):
    953                 # 'name' is already an info object
    954                 zinfo = name
    955             else:
    956                 # Get info object for name
    957                 zinfo = self.getinfo(name)
    958 
    959             zef_file.seek(zinfo.header_offset, 0)
    960 
    961             # Skip the file header:
    962             fheader = zef_file.read(sizeFileHeader)
    963             if len(fheader) != sizeFileHeader:
    964                 raise BadZipfile("Truncated file header")
    965             fheader = struct.unpack(structFileHeader, fheader)
    966             if fheader[_FH_SIGNATURE] != stringFileHeader:
    967                 raise BadZipfile("Bad magic number for file header")
    968 
    969             fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
    970             if fheader[_FH_EXTRA_FIELD_LENGTH]:
    971                 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
    972 
    973             if fname != zinfo.orig_filename:
    974                 raise BadZipfile, \
    975                         'File name in directory "%s" and header "%s" differ.' % (
    976                             zinfo.orig_filename, fname)
    977 
    978             # check for encrypted flag & handle password
    979             is_encrypted = zinfo.flag_bits & 0x1
    980             zd = None
    981             if is_encrypted:
    982                 if not pwd:
    983                     pwd = self.pwd
    984                 if not pwd:
    985                     raise RuntimeError, "File %s is encrypted, " \
    986                         "password required for extraction" % name
    987 
    988                 zd = _ZipDecrypter(pwd)
    989                 # The first 12 bytes in the cypher stream is an encryption header
    990                 #  used to strengthen the algorithm. The first 11 bytes are
    991                 #  completely random, while the 12th contains the MSB of the CRC,
    992                 #  or the MSB of the file time depending on the header type
    993                 #  and is used to check the correctness of the password.
    994                 bytes = zef_file.read(12)
    995                 h = map(zd, bytes[0:12])
    996                 if zinfo.flag_bits & 0x8:
    997                     # compare against the file type from extended local headers
    998                     check_byte = (zinfo._raw_time >> 8) & 0xff
    999                 else:
   1000                     # compare against the CRC otherwise
   1001                     check_byte = (zinfo.CRC >> 24) & 0xff
   1002                 if ord(h[11]) != check_byte:
   1003                     raise RuntimeError("Bad password for file", name)
   1004 
   1005             return ZipExtFile(zef_file, mode, zinfo, zd,
   1006                     close_fileobj=should_close)
   1007         except:
   1008             if should_close:
   1009                 zef_file.close()
   1010             raise
   1011 
   1012     def extract(self, member, path=None, pwd=None):
   1013         """Extract a member from the archive to the current working directory,
   1014            using its full name. Its file information is extracted as accurately
   1015            as possible. `member' may be a filename or a ZipInfo object. You can
   1016            specify a different directory using `path'.
   1017         """
   1018         if not isinstance(member, ZipInfo):
   1019             member = self.getinfo(member)
   1020 
   1021         if path is None:
   1022             path = os.getcwd()
   1023 
   1024         return self._extract_member(member, path, pwd)
   1025 
   1026     def extractall(self, path=None, members=None, pwd=None):
   1027         """Extract all members from the archive to the current working
   1028            directory. `path' specifies a different directory to extract to.
   1029            `members' is optional and must be a subset of the list returned
   1030            by namelist().
   1031         """
   1032         if members is None:
   1033             members = self.namelist()
   1034 
   1035         for zipinfo in members:
   1036             self.extract(zipinfo, path, pwd)
   1037 
   1038     def _extract_member(self, member, targetpath, pwd):
   1039         """Extract the ZipInfo object 'member' to a physical
   1040            file on the path targetpath.
   1041         """
   1042         # build the destination pathname, replacing
   1043         # forward slashes to platform specific separators.
   1044         arcname = member.filename.replace('/', os.path.sep)
   1045 
   1046         if os.path.altsep:
   1047             arcname = arcname.replace(os.path.altsep, os.path.sep)
   1048         # interpret absolute pathname as relative, remove drive letter or
   1049         # UNC path, redundant separators, "." and ".." components.
   1050         arcname = os.path.splitdrive(arcname)[1]
   1051         arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
   1052                     if x not in ('', os.path.curdir, os.path.pardir))
   1053         if os.path.sep == '\\':
   1054             # filter illegal characters on Windows
   1055             illegal = ':<>|"?*'
   1056             if isinstance(arcname, unicode):
   1057                 table = {ord(c): ord('_') for c in illegal}
   1058             else:
   1059                 table = string.maketrans(illegal, '_' * len(illegal))
   1060             arcname = arcname.translate(table)
   1061             # remove trailing dots
   1062             arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
   1063             arcname = os.path.sep.join(x for x in arcname if x)
   1064 
   1065         targetpath = os.path.join(targetpath, arcname)
   1066         targetpath = os.path.normpath(targetpath)
   1067 
   1068         # Create all upper directories if necessary.
   1069         upperdirs = os.path.dirname(targetpath)
   1070         if upperdirs and not os.path.exists(upperdirs):
   1071             os.makedirs(upperdirs)
   1072 
   1073         if member.filename[-1] == '/':
   1074             if not os.path.isdir(targetpath):
   1075                 os.mkdir(targetpath)
   1076             return targetpath
   1077 
   1078         with self.open(member, pwd=pwd) as source, \
   1079              file(targetpath, "wb") as target:
   1080             shutil.copyfileobj(source, target)
   1081 
   1082         return targetpath
   1083 
   1084     def _writecheck(self, zinfo):
   1085         """Check for errors before writing a file to the archive."""
   1086         if zinfo.filename in self.NameToInfo:
   1087             if self.debug:      # Warning for duplicate names
   1088                 print "Duplicate name:", zinfo.filename
   1089         if self.mode not in ("w", "a"):
   1090             raise RuntimeError, 'write() requires mode "w" or "a"'
   1091         if not self.fp:
   1092             raise RuntimeError, \
   1093                   "Attempt to write ZIP archive that was already closed"
   1094         if zinfo.compress_type == ZIP_DEFLATED and not zlib:
   1095             raise RuntimeError, \
   1096                   "Compression requires the (missing) zlib module"
   1097         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
   1098             raise RuntimeError, \
   1099                   "That compression method is not supported"
   1100         if zinfo.file_size > ZIP64_LIMIT:
   1101             if not self._allowZip64:
   1102                 raise LargeZipFile("Filesize would require ZIP64 extensions")
   1103         if zinfo.header_offset > ZIP64_LIMIT:
   1104             if not self._allowZip64:
   1105                 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
   1106 
   1107     def write(self, filename, arcname=None, compress_type=None):
   1108         """Put the bytes from filename into the archive under the name
   1109         arcname."""
   1110         if not self.fp:
   1111             raise RuntimeError(
   1112                   "Attempt to write to ZIP archive that was already closed")
   1113 
   1114         st = os.stat(filename)
   1115         isdir = stat.S_ISDIR(st.st_mode)
   1116         mtime = time.localtime(st.st_mtime)
   1117         date_time = mtime[0:6]
   1118         # Create ZipInfo instance to store file information
   1119         if arcname is None:
   1120             arcname = filename
   1121         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
   1122         while arcname[0] in (os.sep, os.altsep):
   1123             arcname = arcname[1:]
   1124         if isdir:
   1125             arcname += '/'
   1126         zinfo = ZipInfo(arcname, date_time)
   1127         zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
   1128         if compress_type is None:
   1129             zinfo.compress_type = self.compression
   1130         else:
   1131             zinfo.compress_type = compress_type
   1132 
   1133         zinfo.file_size = st.st_size
   1134         zinfo.flag_bits = 0x00
   1135         zinfo.header_offset = self.fp.tell()    # Start of header bytes
   1136 
   1137         self._writecheck(zinfo)
   1138         self._didModify = True
   1139 
   1140         if isdir:
   1141             zinfo.file_size = 0
   1142             zinfo.compress_size = 0
   1143             zinfo.CRC = 0
   1144             self.filelist.append(zinfo)
   1145             self.NameToInfo[zinfo.filename] = zinfo
   1146             self.fp.write(zinfo.FileHeader(False))
   1147             return
   1148 
   1149         with open(filename, "rb") as fp:
   1150             # Must overwrite CRC and sizes with correct data later
   1151             zinfo.CRC = CRC = 0
   1152             zinfo.compress_size = compress_size = 0
   1153             # Compressed size can be larger than uncompressed size
   1154             zip64 = self._allowZip64 and \
   1155                     zinfo.file_size * 1.05 > ZIP64_LIMIT
   1156             self.fp.write(zinfo.FileHeader(zip64))
   1157             if zinfo.compress_type == ZIP_DEFLATED:
   1158                 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
   1159                      zlib.DEFLATED, -15)
   1160             else:
   1161                 cmpr = None
   1162             file_size = 0
   1163             while 1:
   1164                 buf = fp.read(1024 * 8)
   1165                 if not buf:
   1166                     break
   1167                 file_size = file_size + len(buf)
   1168                 CRC = crc32(buf, CRC) & 0xffffffff
   1169                 if cmpr:
   1170                     buf = cmpr.compress(buf)
   1171                     compress_size = compress_size + len(buf)
   1172                 self.fp.write(buf)
   1173         if cmpr:
   1174             buf = cmpr.flush()
   1175             compress_size = compress_size + len(buf)
   1176             self.fp.write(buf)
   1177             zinfo.compress_size = compress_size
   1178         else:
   1179             zinfo.compress_size = file_size
   1180         zinfo.CRC = CRC
   1181         zinfo.file_size = file_size
   1182         if not zip64 and self._allowZip64:
   1183             if file_size > ZIP64_LIMIT:
   1184                 raise RuntimeError('File size has increased during compressing')
   1185             if compress_size > ZIP64_LIMIT:
   1186                 raise RuntimeError('Compressed size larger than uncompressed size')
   1187         # Seek backwards and write file header (which will now include
   1188         # correct CRC and file sizes)
   1189         position = self.fp.tell()       # Preserve current position in file
   1190         self.fp.seek(zinfo.header_offset, 0)
   1191         self.fp.write(zinfo.FileHeader(zip64))
   1192         self.fp.seek(position, 0)
   1193         self.filelist.append(zinfo)
   1194         self.NameToInfo[zinfo.filename] = zinfo
   1195 
   1196     def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
   1197         """Write a file into the archive.  The contents is the string
   1198         'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
   1199         the name of the file in the archive."""
   1200         if not isinstance(zinfo_or_arcname, ZipInfo):
   1201             zinfo = ZipInfo(filename=zinfo_or_arcname,
   1202                             date_time=time.localtime(time.time())[:6])
   1203 
   1204             zinfo.compress_type = self.compression
   1205             zinfo.external_attr = 0600 << 16
   1206         else:
   1207             zinfo = zinfo_or_arcname
   1208 
   1209         if not self.fp:
   1210             raise RuntimeError(
   1211                   "Attempt to write to ZIP archive that was already closed")
   1212 
   1213         if compress_type is not None:
   1214             zinfo.compress_type = compress_type
   1215 
   1216         zinfo.file_size = len(bytes)            # Uncompressed size
   1217         zinfo.header_offset = self.fp.tell()    # Start of header bytes
   1218         self._writecheck(zinfo)
   1219         self._didModify = True
   1220         zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
   1221         if zinfo.compress_type == ZIP_DEFLATED:
   1222             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
   1223                  zlib.DEFLATED, -15)
   1224             bytes = co.compress(bytes) + co.flush()
   1225             zinfo.compress_size = len(bytes)    # Compressed size
   1226         else:
   1227             zinfo.compress_size = zinfo.file_size
   1228         zip64 = zinfo.file_size > ZIP64_LIMIT or \
   1229                 zinfo.compress_size > ZIP64_LIMIT
   1230         if zip64 and not self._allowZip64:
   1231             raise LargeZipFile("Filesize would require ZIP64 extensions")
   1232         self.fp.write(zinfo.FileHeader(zip64))
   1233         self.fp.write(bytes)
   1234         if zinfo.flag_bits & 0x08:
   1235             # Write CRC and file sizes after the file data
   1236             fmt = '<LQQ' if zip64 else '<LLL'
   1237             self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
   1238                   zinfo.file_size))
   1239         self.fp.flush()
   1240         self.filelist.append(zinfo)
   1241         self.NameToInfo[zinfo.filename] = zinfo
   1242 
   1243     def __del__(self):
   1244         """Call the "close()" method in case the user forgot."""
   1245         self.close()
   1246 
   1247     def close(self):
   1248         """Close the file, and for mode "w" and "a" write the ending
   1249         records."""
   1250         if self.fp is None:
   1251             return
   1252 
   1253         try:
   1254             if self.mode in ("w", "a") and self._didModify: # write ending records
   1255                 count = 0
   1256                 pos1 = self.fp.tell()
   1257                 for zinfo in self.filelist:         # write central directory
   1258                     count = count + 1
   1259                     dt = zinfo.date_time
   1260                     dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
   1261                     dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
   1262                     extra = []
   1263                     if zinfo.file_size > ZIP64_LIMIT \
   1264                             or zinfo.compress_size > ZIP64_LIMIT:
   1265                         extra.append(zinfo.file_size)
   1266                         extra.append(zinfo.compress_size)
   1267                         file_size = 0xffffffff
   1268                         compress_size = 0xffffffff
   1269                     else:
   1270                         file_size = zinfo.file_size
   1271                         compress_size = zinfo.compress_size
   1272 
   1273                     if zinfo.header_offset > ZIP64_LIMIT:
   1274                         extra.append(zinfo.header_offset)
   1275                         header_offset = 0xffffffffL
   1276                     else:
   1277                         header_offset = zinfo.header_offset
   1278 
   1279                     extra_data = zinfo.extra
   1280                     if extra:
   1281                         # Append a ZIP64 field to the extra's
   1282                         extra_data = struct.pack(
   1283                                 '<HH' + 'Q'*len(extra),
   1284                                 1, 8*len(extra), *extra) + extra_data
   1285 
   1286                         extract_version = max(45, zinfo.extract_version)
   1287                         create_version = max(45, zinfo.create_version)
   1288                     else:
   1289                         extract_version = zinfo.extract_version
   1290                         create_version = zinfo.create_version
   1291 
   1292                     try:
   1293                         filename, flag_bits = zinfo._encodeFilenameFlags()
   1294                         centdir = struct.pack(structCentralDir,
   1295                         stringCentralDir, create_version,
   1296                         zinfo.create_system, extract_version, zinfo.reserved,
   1297                         flag_bits, zinfo.compress_type, dostime, dosdate,
   1298                         zinfo.CRC, compress_size, file_size,
   1299                         len(filename), len(extra_data), len(zinfo.comment),
   1300                         0, zinfo.internal_attr, zinfo.external_attr,
   1301                         header_offset)
   1302                     except DeprecationWarning:
   1303                         print >>sys.stderr, (structCentralDir,
   1304                         stringCentralDir, create_version,
   1305                         zinfo.create_system, extract_version, zinfo.reserved,
   1306                         zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
   1307                         zinfo.CRC, compress_size, file_size,
   1308                         len(zinfo.filename), len(extra_data), len(zinfo.comment),
   1309                         0, zinfo.internal_attr, zinfo.external_attr,
   1310                         header_offset)
   1311                         raise
   1312                     self.fp.write(centdir)
   1313                     self.fp.write(filename)
   1314                     self.fp.write(extra_data)
   1315                     self.fp.write(zinfo.comment)
   1316 
   1317                 pos2 = self.fp.tell()
   1318                 # Write end-of-zip-archive record
   1319                 centDirCount = count
   1320                 centDirSize = pos2 - pos1
   1321                 centDirOffset = pos1
   1322                 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
   1323                     centDirOffset > ZIP64_LIMIT or
   1324                     centDirSize > ZIP64_LIMIT):
   1325                     # Need to write the ZIP64 end-of-archive records
   1326                     zip64endrec = struct.pack(
   1327                             structEndArchive64, stringEndArchive64,
   1328                             44, 45, 45, 0, 0, centDirCount, centDirCount,
   1329                             centDirSize, centDirOffset)
   1330                     self.fp.write(zip64endrec)
   1331 
   1332                     zip64locrec = struct.pack(
   1333                             structEndArchive64Locator,
   1334                             stringEndArchive64Locator, 0, pos2, 1)
   1335                     self.fp.write(zip64locrec)
   1336                     centDirCount = min(centDirCount, 0xFFFF)
   1337                     centDirSize = min(centDirSize, 0xFFFFFFFF)
   1338                     centDirOffset = min(centDirOffset, 0xFFFFFFFF)
   1339 
   1340                 endrec = struct.pack(structEndArchive, stringEndArchive,
   1341                                     0, 0, centDirCount, centDirCount,
   1342                                     centDirSize, centDirOffset, len(self._comment))
   1343                 self.fp.write(endrec)
   1344                 self.fp.write(self._comment)
   1345                 self.fp.flush()
   1346         finally:
   1347             fp = self.fp
   1348             self.fp = None
   1349             if not self._filePassed:
   1350                 fp.close()
   1351 
   1352 
   1353 class PyZipFile(ZipFile):
   1354     """Class to create ZIP archives with Python library files and packages."""
   1355 
   1356     def writepy(self, pathname, basename = ""):
   1357         """Add all files from "pathname" to the ZIP archive.
   1358 
   1359         If pathname is a package directory, search the directory and
   1360         all package subdirectories recursively for all *.py and enter
   1361         the modules into the archive.  If pathname is a plain
   1362         directory, listdir *.py and enter all modules.  Else, pathname
   1363         must be a Python *.py file and the module will be put into the
   1364         archive.  Added modules are always module.pyo or module.pyc.
   1365         This method will compile the module.py into module.pyc if
   1366         necessary.
   1367         """
   1368         dir, name = os.path.split(pathname)
   1369         if os.path.isdir(pathname):
   1370             initname = os.path.join(pathname, "__init__.py")
   1371             if os.path.isfile(initname):
   1372                 # This is a package directory, add it
   1373                 if basename:
   1374                     basename = "%s/%s" % (basename, name)
   1375                 else:
   1376                     basename = name
   1377                 if self.debug:
   1378                     print "Adding package in", pathname, "as", basename
   1379                 fname, arcname = self._get_codename(initname[0:-3], basename)
   1380                 if self.debug:
   1381                     print "Adding", arcname
   1382                 self.write(fname, arcname)
   1383                 dirlist = os.listdir(pathname)
   1384                 dirlist.remove("__init__.py")
   1385                 # Add all *.py files and package subdirectories
   1386                 for filename in dirlist:
   1387                     path = os.path.join(pathname, filename)
   1388                     root, ext = os.path.splitext(filename)
   1389                     if os.path.isdir(path):
   1390                         if os.path.isfile(os.path.join(path, "__init__.py")):
   1391                             # This is a package directory, add it
   1392                             self.writepy(path, basename)  # Recursive call
   1393                     elif ext == ".py":
   1394                         fname, arcname = self._get_codename(path[0:-3],
   1395                                          basename)
   1396                         if self.debug:
   1397                             print "Adding", arcname
   1398                         self.write(fname, arcname)
   1399             else:
   1400                 # This is NOT a package directory, add its files at top level
   1401                 if self.debug:
   1402                     print "Adding files from directory", pathname
   1403                 for filename in os.listdir(pathname):
   1404                     path = os.path.join(pathname, filename)
   1405                     root, ext = os.path.splitext(filename)
   1406                     if ext == ".py":
   1407                         fname, arcname = self._get_codename(path[0:-3],
   1408                                          basename)
   1409                         if self.debug:
   1410                             print "Adding", arcname
   1411                         self.write(fname, arcname)
   1412         else:
   1413             if pathname[-3:] != ".py":
   1414                 raise RuntimeError, \
   1415                       'Files added with writepy() must end with ".py"'
   1416             fname, arcname = self._get_codename(pathname[0:-3], basename)
   1417             if self.debug:
   1418                 print "Adding file", arcname
   1419             self.write(fname, arcname)
   1420 
   1421     def _get_codename(self, pathname, basename):
   1422         """Return (filename, archivename) for the path.
   1423 
   1424         Given a module name path, return the correct file path and
   1425         archive name, compiling if necessary.  For example, given
   1426         /python/lib/string, return (/python/lib/string.pyc, string).
   1427         """
   1428         file_py  = pathname + ".py"
   1429         file_pyc = pathname + ".pyc"
   1430         file_pyo = pathname + ".pyo"
   1431         if os.path.isfile(file_pyo) and \
   1432                             os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
   1433             fname = file_pyo    # Use .pyo file
   1434         elif not os.path.isfile(file_pyc) or \
   1435              os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
   1436             import py_compile
   1437             if self.debug:
   1438                 print "Compiling", file_py
   1439             try:
   1440                 py_compile.compile(file_py, file_pyc, None, True)
   1441             except py_compile.PyCompileError,err:
   1442                 print err.msg
   1443             fname = file_pyc
   1444         else:
   1445             fname = file_pyc
   1446         archivename = os.path.split(fname)[1]
   1447         if basename:
   1448             archivename = "%s/%s" % (basename, archivename)
   1449         return (fname, archivename)
   1450 
   1451 
   1452 def main(args = None):
   1453     import textwrap
   1454     USAGE=textwrap.dedent("""\
   1455         Usage:
   1456             zipfile.py -l zipfile.zip        # Show listing of a zipfile
   1457             zipfile.py -t zipfile.zip        # Test if a zipfile is valid
   1458             zipfile.py -e zipfile.zip target # Extract zipfile into target dir
   1459             zipfile.py -c zipfile.zip src ... # Create zipfile from sources
   1460         """)
   1461     if args is None:
   1462         args = sys.argv[1:]
   1463 
   1464     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
   1465         print USAGE
   1466         sys.exit(1)
   1467 
   1468     if args[0] == '-l':
   1469         if len(args) != 2:
   1470             print USAGE
   1471             sys.exit(1)
   1472         with ZipFile(args[1], 'r') as zf:
   1473             zf.printdir()
   1474 
   1475     elif args[0] == '-t':
   1476         if len(args) != 2:
   1477             print USAGE
   1478             sys.exit(1)
   1479         with ZipFile(args[1], 'r') as zf:
   1480             badfile = zf.testzip()
   1481         if badfile:
   1482             print("The following enclosed file is corrupted: {!r}".format(badfile))
   1483         print "Done testing"
   1484 
   1485     elif args[0] == '-e':
   1486         if len(args) != 3:
   1487             print USAGE
   1488             sys.exit(1)
   1489 
   1490         with ZipFile(args[1], 'r') as zf:
   1491             out = args[2]
   1492             for path in zf.namelist():
   1493                 if path.startswith('./'):
   1494                     tgt = os.path.join(out, path[2:])
   1495                 else:
   1496                     tgt = os.path.join(out, path)
   1497 
   1498                 tgtdir = os.path.dirname(tgt)
   1499                 if not os.path.exists(tgtdir):
   1500                     os.makedirs(tgtdir)
   1501                 with open(tgt, 'wb') as fp:
   1502                     fp.write(zf.read(path))
   1503 
   1504     elif args[0] == '-c':
   1505         if len(args) < 3:
   1506             print USAGE
   1507             sys.exit(1)
   1508 
   1509         def addToZip(zf, path, zippath):
   1510             if os.path.isfile(path):
   1511                 zf.write(path, zippath, ZIP_DEFLATED)
   1512             elif os.path.isdir(path):
   1513                 for nm in os.listdir(path):
   1514                     addToZip(zf,
   1515                             os.path.join(path, nm), os.path.join(zippath, nm))
   1516             # else: ignore
   1517 
   1518         with ZipFile(args[1], 'w', allowZip64=True) as zf:
   1519             for src in args[2:]:
   1520                 addToZip(zf, src, os.path.basename(src))
   1521 
   1522 if __name__ == "__main__":
   1523     main()
   1524