Home | History | Annotate | Download | only in Lib
      1 """
      2 Read and write ZIP files.
      3 """
      4 import struct, os, time, sys, shutil
      5 import binascii, cStringIO, stat
      6 import io
      7 import re
      8 import string
      9 
     10 try:
     11     import zlib # We may need its compression method
     12     crc32 = zlib.crc32
     13 except ImportError:
     14     zlib = None
     15     crc32 = binascii.crc32
     16 
     17 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
     18            "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
     19 
     20 class BadZipfile(Exception):
     21     pass
     22 
     23 
     24 class LargeZipFile(Exception):
     25     """
     26     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
     27     and those extensions are disabled.
     28     """
     29 
     30 error = BadZipfile      # The exception raised by this module
     31 
     32 ZIP64_LIMIT = (1 << 31) - 1
     33 ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
     34 ZIP_MAX_COMMENT = (1 << 16) - 1
     35 
     36 # constants for Zip file compression methods
     37 ZIP_STORED = 0
     38 ZIP_DEFLATED = 8
     39 # Other ZIP compression methods not supported
     40 
     41 # Below are some formats and associated data for reading/writing headers using
     42 # the struct module.  The names and structures of headers/records are those used
     43 # in the PKWARE description of the ZIP file format:
     44 #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
     45 # (URL valid as of January 2008)
     46 
     47 # The "end of central directory" structure, magic number, size, and indices
     48 # (section V.I in the format document)
     49 structEndArchive = "<4s4H2LH"
     50 stringEndArchive = "PK\005\006"
     51 sizeEndCentDir = struct.calcsize(structEndArchive)
     52 
     53 _ECD_SIGNATURE = 0
     54 _ECD_DISK_NUMBER = 1
     55 _ECD_DISK_START = 2
     56 _ECD_ENTRIES_THIS_DISK = 3
     57 _ECD_ENTRIES_TOTAL = 4
     58 _ECD_SIZE = 5
     59 _ECD_OFFSET = 6
     60 _ECD_COMMENT_SIZE = 7
     61 # These last two indices are not part of the structure as defined in the
     62 # spec, but they are used internally by this module as a convenience
     63 _ECD_COMMENT = 8
     64 _ECD_LOCATION = 9
     65 
     66 # The "central directory" structure, magic number, size, and indices
     67 # of entries in the structure (section V.F in the format document)
     68 structCentralDir = "<4s4B4HL2L5H2L"
     69 stringCentralDir = "PK\001\002"
     70 sizeCentralDir = struct.calcsize(structCentralDir)
     71 
     72 # indexes of entries in the central directory structure
     73 _CD_SIGNATURE = 0
     74 _CD_CREATE_VERSION = 1
     75 _CD_CREATE_SYSTEM = 2
     76 _CD_EXTRACT_VERSION = 3
     77 _CD_EXTRACT_SYSTEM = 4
     78 _CD_FLAG_BITS = 5
     79 _CD_COMPRESS_TYPE = 6
     80 _CD_TIME = 7
     81 _CD_DATE = 8
     82 _CD_CRC = 9
     83 _CD_COMPRESSED_SIZE = 10
     84 _CD_UNCOMPRESSED_SIZE = 11
     85 _CD_FILENAME_LENGTH = 12
     86 _CD_EXTRA_FIELD_LENGTH = 13
     87 _CD_COMMENT_LENGTH = 14
     88 _CD_DISK_NUMBER_START = 15
     89 _CD_INTERNAL_FILE_ATTRIBUTES = 16
     90 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
     91 _CD_LOCAL_HEADER_OFFSET = 18
     92 
     93 # The "local file header" structure, magic number, size, and indices
     94 # (section V.A in the format document)
     95 structFileHeader = "<4s2B4HL2L2H"
     96 stringFileHeader = "PK\003\004"
     97 sizeFileHeader = struct.calcsize(structFileHeader)
     98 
     99 _FH_SIGNATURE = 0
    100 _FH_EXTRACT_VERSION = 1
    101 _FH_EXTRACT_SYSTEM = 2
    102 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
    103 _FH_COMPRESSION_METHOD = 4
    104 _FH_LAST_MOD_TIME = 5
    105 _FH_LAST_MOD_DATE = 6
    106 _FH_CRC = 7
    107 _FH_COMPRESSED_SIZE = 8
    108 _FH_UNCOMPRESSED_SIZE = 9
    109 _FH_FILENAME_LENGTH = 10
    110 _FH_EXTRA_FIELD_LENGTH = 11
    111 
    112 # The "Zip64 end of central directory locator" structure, magic number, and size
    113 structEndArchive64Locator = "<4sLQL"
    114 stringEndArchive64Locator = "PK\x06\x07"
    115 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
    116 
    117 # The "Zip64 end of central directory" record, magic number, size, and indices
    118 # (section V.G in the format document)
    119 structEndArchive64 = "<4sQ2H2L4Q"
    120 stringEndArchive64 = "PK\x06\x06"
    121 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
    122 
    123 _CD64_SIGNATURE = 0
    124 _CD64_DIRECTORY_RECSIZE = 1
    125 _CD64_CREATE_VERSION = 2
    126 _CD64_EXTRACT_VERSION = 3
    127 _CD64_DISK_NUMBER = 4
    128 _CD64_DISK_NUMBER_START = 5
    129 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
    130 _CD64_NUMBER_ENTRIES_TOTAL = 7
    131 _CD64_DIRECTORY_SIZE = 8
    132 _CD64_OFFSET_START_CENTDIR = 9
    133 
    134 def _check_zipfile(fp):
    135     try:
    136         if _EndRecData(fp):
    137             return True         # file has correct magic number
    138     except IOError:
    139         pass
    140     return False
    141 
    142 def is_zipfile(filename):
    143     """Quickly see if a file is a ZIP file by checking the magic number.
    144 
    145     The filename argument may be a file or file-like object too.
    146     """
    147     result = False
    148     try:
    149         if hasattr(filename, "read"):
    150             result = _check_zipfile(fp=filename)
    151         else:
    152             with open(filename, "rb") as fp:
    153                 result = _check_zipfile(fp)
    154     except IOError:
    155         pass
    156     return result
    157 
    158 def _EndRecData64(fpin, offset, endrec):
    159     """
    160     Read the ZIP64 end-of-archive records and use that to update endrec
    161     """
    162     try:
    163         fpin.seek(offset - sizeEndCentDir64Locator, 2)
    164     except IOError:
    165         # If the seek fails, the file is not large enough to contain a ZIP64
    166         # end-of-archive record, so just return the end record we were given.
    167         return endrec
    168 
    169     data = fpin.read(sizeEndCentDir64Locator)
    170     if len(data) != sizeEndCentDir64Locator:
    171         return endrec
    172     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
    173     if sig != stringEndArchive64Locator:
    174         return endrec
    175 
    176     if diskno != 0 or disks != 1:
    177         raise BadZipfile("zipfiles that span multiple disks are not supported")
    178 
    179     # Assume no 'zip64 extensible data'
    180     fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
    181     data = fpin.read(sizeEndCentDir64)
    182     if len(data) != sizeEndCentDir64:
    183         return endrec
    184     sig, sz, create_version, read_version, disk_num, disk_dir, \
    185             dircount, dircount2, dirsize, diroffset = \
    186             struct.unpack(structEndArchive64, data)
    187     if sig != stringEndArchive64:
    188         return endrec
    189 
    190     # Update the original endrec using data from the ZIP64 record
    191     endrec[_ECD_SIGNATURE] = sig
    192     endrec[_ECD_DISK_NUMBER] = disk_num
    193     endrec[_ECD_DISK_START] = disk_dir
    194     endrec[_ECD_ENTRIES_THIS_DISK] = dircount
    195     endrec[_ECD_ENTRIES_TOTAL] = dircount2
    196     endrec[_ECD_SIZE] = dirsize
    197     endrec[_ECD_OFFSET] = diroffset
    198     return endrec
    199 
    200 
    201 def _EndRecData(fpin):
    202     """Return data from the "End of Central Directory" record, or None.
    203 
    204     The data is a list of the nine items in the ZIP "End of central dir"
    205     record followed by a tenth item, the file seek offset of this record."""
    206 
    207     # Determine file size
    208     fpin.seek(0, 2)
    209     filesize = fpin.tell()
    210 
    211     # Check to see if this is ZIP file with no archive comment (the
    212     # "end of central directory" structure should be the last item in the
    213     # file if this is the case).
    214     try:
    215         fpin.seek(-sizeEndCentDir, 2)
    216     except IOError:
    217         return None
    218     data = fpin.read()
    219     if (len(data) == sizeEndCentDir and
    220         data[0:4] == stringEndArchive and
    221         data[-2:] == b"\000\000"):
    222         # the signature is correct and there's no comment, unpack structure
    223         endrec = struct.unpack(structEndArchive, data)
    224         endrec=list(endrec)
    225 
    226         # Append a blank comment and record start offset
    227         endrec.append("")
    228         endrec.append(filesize - sizeEndCentDir)
    229 
    230         # Try to read the "Zip64 end of central directory" structure
    231         return _EndRecData64(fpin, -sizeEndCentDir, endrec)
    232 
    233     # Either this is not a ZIP file, or it is a ZIP file with an archive
    234     # comment.  Search the end of the file for the "end of central directory"
    235     # record signature. The comment is the last item in the ZIP file and may be
    236     # up to 64K long.  It is assumed that the "end of central directory" magic
    237     # number does not appear in the comment.
    238     maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
    239     fpin.seek(maxCommentStart, 0)
    240     data = fpin.read()
    241     start = data.rfind(stringEndArchive)
    242     if start >= 0:
    243         # found the magic number; attempt to unpack and interpret
    244         recData = data[start:start+sizeEndCentDir]
    245         if len(recData) != sizeEndCentDir:
    246             # Zip file is corrupted.
    247             return None
    248         endrec = list(struct.unpack(structEndArchive, recData))
    249         commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
    250         comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
    251         endrec.append(comment)
    252         endrec.append(maxCommentStart + start)
    253 
    254         # Try to read the "Zip64 end of central directory" structure
    255         return _EndRecData64(fpin, maxCommentStart + start - filesize,
    256                              endrec)
    257 
    258     # Unable to find a valid end of central directory structure
    259     return None
    260 
    261 
    262 class ZipInfo (object):
    263     """Class with attributes describing each file in the ZIP archive."""
    264 
    265     __slots__ = (
    266             'orig_filename',
    267             'filename',
    268             'date_time',
    269             'compress_type',
    270             'comment',
    271             'extra',
    272             'create_system',
    273             'create_version',
    274             'extract_version',
    275             'reserved',
    276             'flag_bits',
    277             'volume',
    278             'internal_attr',
    279             'external_attr',
    280             'header_offset',
    281             'CRC',
    282             'compress_size',
    283             'file_size',
    284             '_raw_time',
    285         )
    286 
    287     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
    288         self.orig_filename = filename   # Original file name in archive
    289 
    290         # Terminate the file name at the first null byte.  Null bytes in file
    291         # names are used as tricks by viruses in archives.
    292         null_byte = filename.find(chr(0))
    293         if null_byte >= 0:
    294             filename = filename[0:null_byte]
    295         # This is used to ensure paths in generated ZIP files always use
    296         # forward slashes as the directory separator, as required by the
    297         # ZIP format specification.
    298         if os.sep != "/" and os.sep in filename:
    299             filename = filename.replace(os.sep, "/")
    300 
    301         self.filename = filename        # Normalized file name
    302         self.date_time = date_time      # year, month, day, hour, min, sec
    303 
    304         if date_time[0] < 1980:
    305             raise ValueError('ZIP does not support timestamps before 1980')
    306 
    307         # Standard values:
    308         self.compress_type = ZIP_STORED # Type of compression for the file
    309         self.comment = ""               # Comment for each file
    310         self.extra = ""                 # ZIP extra data
    311         if sys.platform == 'win32':
    312             self.create_system = 0          # System which created ZIP archive
    313         else:
    314             # Assume everything else is unix-y
    315             self.create_system = 3          # System which created ZIP archive
    316         self.create_version = 20        # Version which created ZIP archive
    317         self.extract_version = 20       # Version needed to extract archive
    318         self.reserved = 0               # Must be zero
    319         self.flag_bits = 0              # ZIP flag bits
    320         self.volume = 0                 # Volume number of file header
    321         self.internal_attr = 0          # Internal attributes
    322         self.external_attr = 0          # External file attributes
    323         # Other attributes are set by class ZipFile:
    324         # header_offset         Byte offset to the file header
    325         # CRC                   CRC-32 of the uncompressed file
    326         # compress_size         Size of the compressed file
    327         # file_size             Size of the uncompressed file
    328 
    329     def FileHeader(self, zip64=None):
    330         """Return the per-file header as a string."""
    331         dt = self.date_time
    332         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
    333         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
    334         if self.flag_bits & 0x08:
    335             # Set these to zero because we write them after the file data
    336             CRC = compress_size = file_size = 0
    337         else:
    338             CRC = self.CRC
    339             compress_size = self.compress_size
    340             file_size = self.file_size
    341 
    342         extra = self.extra
    343 
    344         if zip64 is None:
    345             zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
    346         if zip64:
    347             fmt = '<HHQQ'
    348             extra = extra + struct.pack(fmt,
    349                     1, struct.calcsize(fmt)-4, file_size, compress_size)
    350         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
    351             if not zip64:
    352                 raise LargeZipFile("Filesize would require ZIP64 extensions")
    353             # File is larger than what fits into a 4 byte integer,
    354             # fall back to the ZIP64 extension
    355             file_size = 0xffffffff
    356             compress_size = 0xffffffff
    357             self.extract_version = max(45, self.extract_version)
    358             self.create_version = max(45, self.extract_version)
    359 
    360         filename, flag_bits = self._encodeFilenameFlags()
    361         header = struct.pack(structFileHeader, stringFileHeader,
    362                  self.extract_version, self.reserved, flag_bits,
    363                  self.compress_type, dostime, dosdate, CRC,
    364                  compress_size, file_size,
    365                  len(filename), len(extra))
    366         return header + filename + extra
    367 
    368     def _encodeFilenameFlags(self):
    369         if isinstance(self.filename, unicode):
    370             try:
    371                 return self.filename.encode('ascii'), self.flag_bits
    372             except UnicodeEncodeError:
    373                 return self.filename.encode('utf-8'), self.flag_bits | 0x800
    374         else:
    375             return self.filename, self.flag_bits
    376 
    377     def _decodeFilename(self):
    378         if self.flag_bits & 0x800:
    379             return self.filename.decode('utf-8')
    380         else:
    381             return self.filename
    382 
    383     def _decodeExtra(self):
    384         # Try to decode the extra field.
    385         extra = self.extra
    386         unpack = struct.unpack
    387         while len(extra) >= 4:
    388             tp, ln = unpack('<HH', extra[:4])
    389             if tp == 1:
    390                 if ln >= 24:
    391                     counts = unpack('<QQQ', extra[4:28])
    392                 elif ln == 16:
    393                     counts = unpack('<QQ', extra[4:20])
    394                 elif ln == 8:
    395                     counts = unpack('<Q', extra[4:12])
    396                 elif ln == 0:
    397                     counts = ()
    398                 else:
    399                     raise RuntimeError, "Corrupt extra field %s"%(ln,)
    400 
    401                 idx = 0
    402 
    403                 # ZIP64 extension (large files and/or large archives)
    404                 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
    405                     self.file_size = counts[idx]
    406                     idx += 1
    407 
    408                 if self.compress_size == 0xFFFFFFFFL:
    409                     self.compress_size = counts[idx]
    410                     idx += 1
    411 
    412                 if self.header_offset == 0xffffffffL:
    413                     old = self.header_offset
    414                     self.header_offset = counts[idx]
    415                     idx+=1
    416 
    417             extra = extra[ln+4:]
    418 
    419 
    420 class _ZipDecrypter:
    421     """Class to handle decryption of files stored within a ZIP archive.
    422 
    423     ZIP supports a password-based form of encryption. Even though known
    424     plaintext attacks have been found against it, it is still useful
    425     to be able to get data out of such a file.
    426 
    427     Usage:
    428         zd = _ZipDecrypter(mypwd)
    429         plain_char = zd(cypher_char)
    430         plain_text = map(zd, cypher_text)
    431     """
    432 
    433     def _GenerateCRCTable():
    434         """Generate a CRC-32 table.
    435 
    436         ZIP encryption uses the CRC32 one-byte primitive for scrambling some
    437         internal keys. We noticed that a direct implementation is faster than
    438         relying on binascii.crc32().
    439         """
    440         poly = 0xedb88320
    441         table = [0] * 256
    442         for i in range(256):
    443             crc = i
    444             for j in range(8):
    445                 if crc & 1:
    446                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
    447                 else:
    448                     crc = ((crc >> 1) & 0x7FFFFFFF)
    449             table[i] = crc
    450         return table
    451     crctable = _GenerateCRCTable()
    452 
    453     def _crc32(self, ch, crc):
    454         """Compute the CRC32 primitive on one byte."""
    455         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
    456 
    457     def __init__(self, pwd):
    458         self.key0 = 305419896
    459         self.key1 = 591751049
    460         self.key2 = 878082192
    461         for p in pwd:
    462             self._UpdateKeys(p)
    463 
    464     def _UpdateKeys(self, c):
    465         self.key0 = self._crc32(c, self.key0)
    466         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
    467         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
    468         self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
    469 
    470     def __call__(self, c):
    471         """Decrypt a single character."""
    472         c = ord(c)
    473         k = self.key2 | 2
    474         c = c ^ (((k * (k^1)) >> 8) & 255)
    475         c = chr(c)
    476         self._UpdateKeys(c)
    477         return c
    478 
    479 
    480 compressor_names = {
    481     0: 'store',
    482     1: 'shrink',
    483     2: 'reduce',
    484     3: 'reduce',
    485     4: 'reduce',
    486     5: 'reduce',
    487     6: 'implode',
    488     7: 'tokenize',
    489     8: 'deflate',
    490     9: 'deflate64',
    491     10: 'implode',
    492     12: 'bzip2',
    493     14: 'lzma',
    494     18: 'terse',
    495     19: 'lz77',
    496     97: 'wavpack',
    497     98: 'ppmd',
    498 }
    499 
    500 
    501 class ZipExtFile(io.BufferedIOBase):
    502     """File-like object for reading an archive member.
    503        Is returned by ZipFile.open().
    504     """
    505 
    506     # Max size supported by decompressor.
    507     MAX_N = 1 << 31 - 1
    508 
    509     # Read from compressed files in 4k blocks.
    510     MIN_READ_SIZE = 4096
    511 
    512     # Search for universal newlines or line chunks.
    513     PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
    514 
    515     def __init__(self, fileobj, mode, zipinfo, decrypter=None,
    516             close_fileobj=False):
    517         self._fileobj = fileobj
    518         self._decrypter = decrypter
    519         self._close_fileobj = close_fileobj
    520 
    521         self._compress_type = zipinfo.compress_type
    522         self._compress_size = zipinfo.compress_size
    523         self._compress_left = zipinfo.compress_size
    524 
    525         if self._compress_type == ZIP_DEFLATED:
    526             self._decompressor = zlib.decompressobj(-15)
    527         elif self._compress_type != ZIP_STORED:
    528             descr = compressor_names.get(self._compress_type)
    529             if descr:
    530                 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
    531             else:
    532                 raise NotImplementedError("compression type %d" % (self._compress_type,))
    533         self._unconsumed = ''
    534 
    535         self._readbuffer = ''
    536         self._offset = 0
    537 
    538         self._universal = 'U' in mode
    539         self.newlines = None
    540 
    541         # Adjust read size for encrypted files since the first 12 bytes
    542         # are for the encryption/password information.
    543         if self._decrypter is not None:
    544             self._compress_left -= 12
    545 
    546         self.mode = mode
    547         self.name = zipinfo.filename
    548 
    549         if hasattr(zipinfo, 'CRC'):
    550             self._expected_crc = zipinfo.CRC
    551             self._running_crc = crc32(b'') & 0xffffffff
    552         else:
    553             self._expected_crc = None
    554 
    555     def readline(self, limit=-1):
    556         """Read and return a line from the stream.
    557 
    558         If limit is specified, at most limit bytes will be read.
    559         """
    560 
    561         if not self._universal and limit < 0:
    562             # Shortcut common case - newline found in buffer.
    563             i = self._readbuffer.find('\n', self._offset) + 1
    564             if i > 0:
    565                 line = self._readbuffer[self._offset: i]
    566                 self._offset = i
    567                 return line
    568 
    569         if not self._universal:
    570             return io.BufferedIOBase.readline(self, limit)
    571 
    572         line = ''
    573         while limit < 0 or len(line) < limit:
    574             readahead = self.peek(2)
    575             if readahead == '':
    576                 return line
    577 
    578             #
    579             # Search for universal newlines or line chunks.
    580             #
    581             # The pattern returns either a line chunk or a newline, but not
    582             # both. Combined with peek(2), we are assured that the sequence
    583             # '\r\n' is always retrieved completely and never split into
    584             # separate newlines - '\r', '\n' due to coincidental readaheads.
    585             #
    586             match = self.PATTERN.search(readahead)
    587             newline = match.group('newline')
    588             if newline is not None:
    589                 if self.newlines is None:
    590                     self.newlines = []
    591                 if newline not in self.newlines:
    592                     self.newlines.append(newline)
    593                 self._offset += len(newline)
    594                 return line + '\n'
    595 
    596             chunk = match.group('chunk')
    597             if limit >= 0:
    598                 chunk = chunk[: limit - len(line)]
    599 
    600             self._offset += len(chunk)
    601             line += chunk
    602 
    603         return line
    604 
    605     def peek(self, n=1):
    606         """Returns buffered bytes without advancing the position."""
    607         if n > len(self._readbuffer) - self._offset:
    608             chunk = self.read(n)
    609             if len(chunk) > self._offset:
    610                 self._readbuffer = chunk + self._readbuffer[self._offset:]
    611                 self._offset = 0
    612             else:
    613                 self._offset -= len(chunk)
    614 
    615         # Return up to 512 bytes to reduce allocation overhead for tight loops.
    616         return self._readbuffer[self._offset: self._offset + 512]
    617 
    618     def readable(self):
    619         return True
    620 
    621     def read(self, n=-1):
    622         """Read and return up to n bytes.
    623         If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
    624         """
    625         buf = ''
    626         if n is None:
    627             n = -1
    628         while True:
    629             if n < 0:
    630                 data = self.read1(n)
    631             elif n > len(buf):
    632                 data = self.read1(n - len(buf))
    633             else:
    634                 return buf
    635             if len(data) == 0:
    636                 return buf
    637             buf += data
    638 
    639     def _update_crc(self, newdata, eof):
    640         # Update the CRC using the given data.
    641         if self._expected_crc is None:
    642             # No need to compute the CRC if we don't have a reference value
    643             return
    644         self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
    645         # Check the CRC if we're at the end of the file
    646         if eof and self._running_crc != self._expected_crc:
    647             raise BadZipfile("Bad CRC-32 for file %r" % self.name)
    648 
    649     def read1(self, n):
    650         """Read up to n bytes with at most one read() system call."""
    651 
    652         # Simplify algorithm (branching) by transforming negative n to large n.
    653         if n < 0 or n is None:
    654             n = self.MAX_N
    655 
    656         # Bytes available in read buffer.
    657         len_readbuffer = len(self._readbuffer) - self._offset
    658 
    659         # Read from file.
    660         if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
    661             nbytes = n - len_readbuffer - len(self._unconsumed)
    662             nbytes = max(nbytes, self.MIN_READ_SIZE)
    663             nbytes = min(nbytes, self._compress_left)
    664 
    665             data = self._fileobj.read(nbytes)
    666             self._compress_left -= len(data)
    667 
    668             if data and self._decrypter is not None:
    669                 data = ''.join(map(self._decrypter, data))
    670 
    671             if self._compress_type == ZIP_STORED:
    672                 self._update_crc(data, eof=(self._compress_left==0))
    673                 self._readbuffer = self._readbuffer[self._offset:] + data
    674                 self._offset = 0
    675             else:
    676                 # Prepare deflated bytes for decompression.
    677                 self._unconsumed += data
    678 
    679         # Handle unconsumed data.
    680         if (len(self._unconsumed) > 0 and n > len_readbuffer and
    681             self._compress_type == ZIP_DEFLATED):
    682             data = self._decompressor.decompress(
    683                 self._unconsumed,
    684                 max(n - len_readbuffer, self.MIN_READ_SIZE)
    685             )
    686 
    687             self._unconsumed = self._decompressor.unconsumed_tail
    688             eof = len(self._unconsumed) == 0 and self._compress_left == 0
    689             if eof:
    690                 data += self._decompressor.flush()
    691 
    692             self._update_crc(data, eof=eof)
    693             self._readbuffer = self._readbuffer[self._offset:] + data
    694             self._offset = 0
    695 
    696         # Read from buffer.
    697         data = self._readbuffer[self._offset: self._offset + n]
    698         self._offset += len(data)
    699         return data
    700 
    701     def close(self):
    702         try :
    703             if self._close_fileobj:
    704                 self._fileobj.close()
    705         finally:
    706             super(ZipExtFile, self).close()
    707 
    708 
    709 class ZipFile(object):
    710     """ Class with methods to open, read, write, close, list zip files.
    711 
    712     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
    713 
    714     file: Either the path to the file, or a file-like object.
    715           If it is a path, the file will be opened and closed by ZipFile.
    716     mode: The mode can be either read "r", write "w" or append "a".
    717     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
    718     allowZip64: if True ZipFile will create files with ZIP64 extensions when
    719                 needed, otherwise it will raise an exception when this would
    720                 be necessary.
    721 
    722     """
    723 
    724     fp = None                   # Set here since __del__ checks it
    725 
    726     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
    727         """Open the ZIP file with mode read "r", write "w" or append "a"."""
    728         if mode not in ("r", "w", "a"):
    729             raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
    730 
    731         if compression == ZIP_STORED:
    732             pass
    733         elif compression == ZIP_DEFLATED:
    734             if not zlib:
    735                 raise RuntimeError,\
    736                       "Compression requires the (missing) zlib module"
    737         else:
    738             raise RuntimeError, "That compression method is not supported"
    739 
    740         self._allowZip64 = allowZip64
    741         self._didModify = False
    742         self.debug = 0  # Level of printing: 0 through 3
    743         self.NameToInfo = {}    # Find file info given name
    744         self.filelist = []      # List of ZipInfo instances for archive
    745         self.compression = compression  # Method of compression
    746         self.mode = key = mode.replace('b', '')[0]
    747         self.pwd = None
    748         self._comment = ''
    749 
    750         # Check if we were passed a file-like object
    751         if isinstance(file, basestring):
    752             self._filePassed = 0
    753             self.filename = file
    754             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
    755             try:
    756                 self.fp = open(file, modeDict[mode])
    757             except IOError:
    758                 if mode == 'a':
    759                     mode = key = 'w'
    760                     self.fp = open(file, modeDict[mode])
    761                 else:
    762                     raise
    763         else:
    764             self._filePassed = 1
    765             self.fp = file
    766             self.filename = getattr(file, 'name', None)
    767 
    768         try:
    769             if key == 'r':
    770                 self._RealGetContents()
    771             elif key == 'w':
    772                 # set the modified flag so central directory gets written
    773                 # even if no files are added to the archive
    774                 self._didModify = True
    775                 self._start_disk = self.fp.tell()
    776             elif key == 'a':
    777                 try:
    778                     # See if file is a zip file
    779                     self._RealGetContents()
    780                     # seek to start of directory and overwrite
    781                     self.fp.seek(self.start_dir, 0)
    782                 except BadZipfile:
    783                     # file is not a zip file, just append
    784                     self.fp.seek(0, 2)
    785 
    786                     # set the modified flag so central directory gets written
    787                     # even if no files are added to the archive
    788                     self._didModify = True
    789                     self._start_disk = self.fp.tell()
    790             else:
    791                 raise RuntimeError('Mode must be "r", "w" or "a"')
    792         except:
    793             fp = self.fp
    794             self.fp = None
    795             if not self._filePassed:
    796                 fp.close()
    797             raise
    798 
    799     def __enter__(self):
    800         return self
    801 
    802     def __exit__(self, type, value, traceback):
    803         self.close()
    804 
    805     def _RealGetContents(self):
    806         """Read in the table of contents for the ZIP file."""
    807         fp = self.fp
    808         try:
    809             endrec = _EndRecData(fp)
    810         except IOError:
    811             raise BadZipfile("File is not a zip file")
    812         if not endrec:
    813             raise BadZipfile, "File is not a zip file"
    814         if self.debug > 1:
    815             print endrec
    816         size_cd = endrec[_ECD_SIZE]             # bytes in central directory
    817         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
    818         self._comment = endrec[_ECD_COMMENT]    # archive comment
    819 
    820         # self._start_disk:  Position of the start of ZIP archive
    821         # It is zero, unless ZIP was concatenated to another file
    822         self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd
    823         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
    824             # If Zip64 extension structures are present, account for them
    825             self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
    826 
    827         if self.debug > 2:
    828             inferred = self._start_disk + offset_cd
    829             print "given, inferred, offset", offset_cd, inferred, self._start_disk
    830         # self.start_dir:  Position of start of central directory
    831         self.start_dir = offset_cd + self._start_disk
    832         fp.seek(self.start_dir, 0)
    833         data = fp.read(size_cd)
    834         fp = cStringIO.StringIO(data)
    835         total = 0
    836         while total < size_cd:
    837             centdir = fp.read(sizeCentralDir)
    838             if len(centdir) != sizeCentralDir:
    839                 raise BadZipfile("Truncated central directory")
    840             centdir = struct.unpack(structCentralDir, centdir)
    841             if centdir[_CD_SIGNATURE] != stringCentralDir:
    842                 raise BadZipfile("Bad magic number for central directory")
    843             if self.debug > 2:
    844                 print centdir
    845             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
    846             # Create ZipInfo instance to store file information
    847             x = ZipInfo(filename)
    848             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
    849             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
    850             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
    851             (x.create_version, x.create_system, x.extract_version, x.reserved,
    852                 x.flag_bits, x.compress_type, t, d,
    853                 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
    854             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
    855             # Convert date/time code to (year, month, day, hour, min, sec)
    856             x._raw_time = t
    857             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
    858                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
    859 
    860             x._decodeExtra()
    861             x.header_offset = x.header_offset + self._start_disk
    862             x.filename = x._decodeFilename()
    863             self.filelist.append(x)
    864             self.NameToInfo[x.filename] = x
    865 
    866             # update total bytes read from central directory
    867             total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
    868                      + centdir[_CD_EXTRA_FIELD_LENGTH]
    869                      + centdir[_CD_COMMENT_LENGTH])
    870 
    871             if self.debug > 2:
    872                 print "total", total
    873 
    874 
    875     def namelist(self):
    876         """Return a list of file names in the archive."""
    877         l = []
    878         for data in self.filelist:
    879             l.append(data.filename)
    880         return l
    881 
    882     def infolist(self):
    883         """Return a list of class ZipInfo instances for files in the
    884         archive."""
    885         return self.filelist
    886 
    887     def printdir(self):
    888         """Print a table of contents for the zip file."""
    889         print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
    890         for zinfo in self.filelist:
    891             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
    892             print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
    893 
    894     def testzip(self):
    895         """Read all the files and check the CRC."""
    896         chunk_size = 2 ** 20
    897         for zinfo in self.filelist:
    898             try:
    899                 # Read by chunks, to avoid an OverflowError or a
    900                 # MemoryError with very large embedded files.
    901                 with self.open(zinfo.filename, "r") as f:
    902                     while f.read(chunk_size):     # Check CRC-32
    903                         pass
    904             except BadZipfile:
    905                 return zinfo.filename
    906 
    907     def getinfo(self, name):
    908         """Return the instance of ZipInfo given 'name'."""
    909         info = self.NameToInfo.get(name)
    910         if info is None:
    911             raise KeyError(
    912                 'There is no item named %r in the archive' % name)
    913 
    914         return info
    915 
    916     def setpassword(self, pwd):
    917         """Set default password for encrypted files."""
    918         self.pwd = pwd
    919 
    920     @property
    921     def comment(self):
    922         """The comment text associated with the ZIP file."""
    923         return self._comment
    924 
    925     @comment.setter
    926     def comment(self, comment):
    927         # check for valid comment length
    928         if len(comment) > ZIP_MAX_COMMENT:
    929             import warnings
    930             warnings.warn('Archive comment is too long; truncating to %d bytes'
    931                           % ZIP_MAX_COMMENT, stacklevel=2)
    932             comment = comment[:ZIP_MAX_COMMENT]
    933         self._comment = comment
    934         self._didModify = True
    935 
    936     def read(self, name, pwd=None):
    937         """Return file bytes (as a string) for name."""
    938         return self.open(name, "r", pwd).read()
    939 
    940     def open(self, name, mode="r", pwd=None):
    941         """Return file-like object for 'name'."""
    942         if mode not in ("r", "U", "rU"):
    943             raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
    944         if not self.fp:
    945             raise RuntimeError, \
    946                   "Attempt to read ZIP archive that was already closed"
    947 
    948         # Only open a new file for instances where we were not
    949         # given a file object in the constructor
    950         if self._filePassed:
    951             zef_file = self.fp
    952             should_close = False
    953         else:
    954             zef_file = open(self.filename, 'rb')
    955             should_close = True
    956 
    957         try:
    958             # Make sure we have an info object
    959             if isinstance(name, ZipInfo):
    960                 # 'name' is already an info object
    961                 zinfo = name
    962             else:
    963                 # Get info object for name
    964                 zinfo = self.getinfo(name)
    965 
    966             zef_file.seek(zinfo.header_offset, 0)
    967 
    968             # Skip the file header:
    969             fheader = zef_file.read(sizeFileHeader)
    970             if len(fheader) != sizeFileHeader:
    971                 raise BadZipfile("Truncated file header")
    972             fheader = struct.unpack(structFileHeader, fheader)
    973             if fheader[_FH_SIGNATURE] != stringFileHeader:
    974                 raise BadZipfile("Bad magic number for file header")
    975 
    976             fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
    977             if fheader[_FH_EXTRA_FIELD_LENGTH]:
    978                 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
    979 
    980             if fname != zinfo.orig_filename:
    981                 raise BadZipfile, \
    982                         'File name in directory "%s" and header "%s" differ.' % (
    983                             zinfo.orig_filename, fname)
    984 
    985             # check for encrypted flag & handle password
    986             is_encrypted = zinfo.flag_bits & 0x1
    987             zd = None
    988             if is_encrypted:
    989                 if not pwd:
    990                     pwd = self.pwd
    991                 if not pwd:
    992                     raise RuntimeError, "File %s is encrypted, " \
    993                         "password required for extraction" % name
    994 
    995                 zd = _ZipDecrypter(pwd)
    996                 # The first 12 bytes in the cypher stream is an encryption header
    997                 #  used to strengthen the algorithm. The first 11 bytes are
    998                 #  completely random, while the 12th contains the MSB of the CRC,
    999                 #  or the MSB of the file time depending on the header type
   1000                 #  and is used to check the correctness of the password.
   1001                 bytes = zef_file.read(12)
   1002                 h = map(zd, bytes[0:12])
   1003                 if zinfo.flag_bits & 0x8:
   1004                     # compare against the file type from extended local headers
   1005                     check_byte = (zinfo._raw_time >> 8) & 0xff
   1006                 else:
   1007                     # compare against the CRC otherwise
   1008                     check_byte = (zinfo.CRC >> 24) & 0xff
   1009                 if ord(h[11]) != check_byte:
   1010                     raise RuntimeError("Bad password for file", name)
   1011 
   1012             return ZipExtFile(zef_file, mode, zinfo, zd,
   1013                     close_fileobj=should_close)
   1014         except:
   1015             if should_close:
   1016                 zef_file.close()
   1017             raise
   1018 
   1019     def extract(self, member, path=None, pwd=None):
   1020         """Extract a member from the archive to the current working directory,
   1021            using its full name. Its file information is extracted as accurately
   1022            as possible. `member' may be a filename or a ZipInfo object. You can
   1023            specify a different directory using `path'.
   1024         """
   1025         if not isinstance(member, ZipInfo):
   1026             member = self.getinfo(member)
   1027 
   1028         if path is None:
   1029             path = os.getcwd()
   1030 
   1031         return self._extract_member(member, path, pwd)
   1032 
   1033     def extractall(self, path=None, members=None, pwd=None):
   1034         """Extract all members from the archive to the current working
   1035            directory. `path' specifies a different directory to extract to.
   1036            `members' is optional and must be a subset of the list returned
   1037            by namelist().
   1038         """
   1039         if members is None:
   1040             members = self.namelist()
   1041 
   1042         for zipinfo in members:
   1043             self.extract(zipinfo, path, pwd)
   1044 
   1045     def _extract_member(self, member, targetpath, pwd):
   1046         """Extract the ZipInfo object 'member' to a physical
   1047            file on the path targetpath.
   1048         """
   1049         # build the destination pathname, replacing
   1050         # forward slashes to platform specific separators.
   1051         arcname = member.filename.replace('/', os.path.sep)
   1052 
   1053         if os.path.altsep:
   1054             arcname = arcname.replace(os.path.altsep, os.path.sep)
   1055         # interpret absolute pathname as relative, remove drive letter or
   1056         # UNC path, redundant separators, "." and ".." components.
   1057         arcname = os.path.splitdrive(arcname)[1]
   1058         arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
   1059                     if x not in ('', os.path.curdir, os.path.pardir))
   1060         if os.path.sep == '\\':
   1061             # filter illegal characters on Windows
   1062             illegal = ':<>|"?*'
   1063             if isinstance(arcname, unicode):
   1064                 table = {ord(c): ord('_') for c in illegal}
   1065             else:
   1066                 table = string.maketrans(illegal, '_' * len(illegal))
   1067             arcname = arcname.translate(table)
   1068             # remove trailing dots
   1069             arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
   1070             arcname = os.path.sep.join(x for x in arcname if x)
   1071 
   1072         targetpath = os.path.join(targetpath, arcname)
   1073         targetpath = os.path.normpath(targetpath)
   1074 
   1075         # Create all upper directories if necessary.
   1076         upperdirs = os.path.dirname(targetpath)
   1077         if upperdirs and not os.path.exists(upperdirs):
   1078             os.makedirs(upperdirs)
   1079 
   1080         if member.filename[-1] == '/':
   1081             if not os.path.isdir(targetpath):
   1082                 os.mkdir(targetpath)
   1083             return targetpath
   1084 
   1085         with self.open(member, pwd=pwd) as source, \
   1086              file(targetpath, "wb") as target:
   1087             shutil.copyfileobj(source, target)
   1088 
   1089         return targetpath
   1090 
   1091     def _writecheck(self, zinfo):
   1092         """Check for errors before writing a file to the archive."""
   1093         if zinfo.filename in self.NameToInfo:
   1094             import warnings
   1095             warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
   1096         if self.mode not in ("w", "a"):
   1097             raise RuntimeError, 'write() requires mode "w" or "a"'
   1098         if not self.fp:
   1099             raise RuntimeError, \
   1100                   "Attempt to write ZIP archive that was already closed"
   1101         if zinfo.compress_type == ZIP_DEFLATED and not zlib:
   1102             raise RuntimeError, \
   1103                   "Compression requires the (missing) zlib module"
   1104         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
   1105             raise RuntimeError, \
   1106                   "That compression method is not supported"
   1107         if not self._allowZip64:
   1108             requires_zip64 = None
   1109             if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
   1110                 requires_zip64 = "Files count"
   1111             elif zinfo.file_size > ZIP64_LIMIT:
   1112                 requires_zip64 = "Filesize"
   1113             elif zinfo.header_offset > ZIP64_LIMIT:
   1114                 requires_zip64 = "Zipfile size"
   1115             if requires_zip64:
   1116                 raise LargeZipFile(requires_zip64 +
   1117                                    " would require ZIP64 extensions")
   1118 
   1119     def write(self, filename, arcname=None, compress_type=None):
   1120         """Put the bytes from filename into the archive under the name
   1121         arcname."""
   1122         if not self.fp:
   1123             raise RuntimeError(
   1124                   "Attempt to write to ZIP archive that was already closed")
   1125 
   1126         st = os.stat(filename)
   1127         isdir = stat.S_ISDIR(st.st_mode)
   1128         mtime = time.localtime(st.st_mtime)
   1129         date_time = mtime[0:6]
   1130         # Create ZipInfo instance to store file information
   1131         if arcname is None:
   1132             arcname = filename
   1133         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
   1134         while arcname[0] in (os.sep, os.altsep):
   1135             arcname = arcname[1:]
   1136         if isdir:
   1137             arcname += '/'
   1138         zinfo = ZipInfo(arcname, date_time)
   1139         zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
   1140         if isdir:
   1141             zinfo.compress_type = ZIP_STORED
   1142         elif compress_type is None:
   1143             zinfo.compress_type = self.compression
   1144         else:
   1145             zinfo.compress_type = compress_type
   1146 
   1147         zinfo.file_size = st.st_size
   1148         zinfo.flag_bits = 0x00
   1149         zinfo.header_offset = self.fp.tell()    # Start of header bytes
   1150 
   1151         self._writecheck(zinfo)
   1152         self._didModify = True
   1153 
   1154         if isdir:
   1155             zinfo.file_size = 0
   1156             zinfo.compress_size = 0
   1157             zinfo.CRC = 0
   1158             zinfo.external_attr |= 0x10  # MS-DOS directory flag
   1159             self.filelist.append(zinfo)
   1160             self.NameToInfo[zinfo.filename] = zinfo
   1161             self.fp.write(zinfo.FileHeader(False))
   1162             return
   1163 
   1164         with open(filename, "rb") as fp:
   1165             # Must overwrite CRC and sizes with correct data later
   1166             zinfo.CRC = CRC = 0
   1167             zinfo.compress_size = compress_size = 0
   1168             # Compressed size can be larger than uncompressed size
   1169             zip64 = self._allowZip64 and \
   1170                     zinfo.file_size * 1.05 > ZIP64_LIMIT
   1171             self.fp.write(zinfo.FileHeader(zip64))
   1172             if zinfo.compress_type == ZIP_DEFLATED:
   1173                 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
   1174                      zlib.DEFLATED, -15)
   1175             else:
   1176                 cmpr = None
   1177             file_size = 0
   1178             while 1:
   1179                 buf = fp.read(1024 * 8)
   1180                 if not buf:
   1181                     break
   1182                 file_size = file_size + len(buf)
   1183                 CRC = crc32(buf, CRC) & 0xffffffff
   1184                 if cmpr:
   1185                     buf = cmpr.compress(buf)
   1186                     compress_size = compress_size + len(buf)
   1187                 self.fp.write(buf)
   1188         if cmpr:
   1189             buf = cmpr.flush()
   1190             compress_size = compress_size + len(buf)
   1191             self.fp.write(buf)
   1192             zinfo.compress_size = compress_size
   1193         else:
   1194             zinfo.compress_size = file_size
   1195         zinfo.CRC = CRC
   1196         zinfo.file_size = file_size
   1197         if not zip64 and self._allowZip64:
   1198             if file_size > ZIP64_LIMIT:
   1199                 raise RuntimeError('File size has increased during compressing')
   1200             if compress_size > ZIP64_LIMIT:
   1201                 raise RuntimeError('Compressed size larger than uncompressed size')
   1202         # Seek backwards and write file header (which will now include
   1203         # correct CRC and file sizes)
   1204         position = self.fp.tell() # Preserve current position in file
   1205         self.fp.seek(zinfo.header_offset, 0)
   1206         self.fp.write(zinfo.FileHeader(zip64))
   1207         self.fp.seek(position, 0)
   1208         self.filelist.append(zinfo)
   1209         self.NameToInfo[zinfo.filename] = zinfo
   1210 
   1211     def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
   1212         """Write a file into the archive.  The contents is the string
   1213         'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
   1214         the name of the file in the archive."""
   1215         if not isinstance(zinfo_or_arcname, ZipInfo):
   1216             zinfo = ZipInfo(filename=zinfo_or_arcname,
   1217                             date_time=time.localtime(time.time())[:6])
   1218 
   1219             zinfo.compress_type = self.compression
   1220             if zinfo.filename[-1] == '/':
   1221                 zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
   1222                 zinfo.external_attr |= 0x10           # MS-DOS directory flag
   1223             else:
   1224                 zinfo.external_attr = 0o600 << 16     # ?rw-------
   1225         else:
   1226             zinfo = zinfo_or_arcname
   1227 
   1228         if not self.fp:
   1229             raise RuntimeError(
   1230                   "Attempt to write to ZIP archive that was already closed")
   1231 
   1232         if compress_type is not None:
   1233             zinfo.compress_type = compress_type
   1234 
   1235         zinfo.file_size = len(bytes)            # Uncompressed size
   1236         zinfo.header_offset = self.fp.tell()    # Start of header bytes
   1237         self._writecheck(zinfo)
   1238         self._didModify = True
   1239         zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
   1240         if zinfo.compress_type == ZIP_DEFLATED:
   1241             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
   1242                  zlib.DEFLATED, -15)
   1243             bytes = co.compress(bytes) + co.flush()
   1244             zinfo.compress_size = len(bytes)    # Compressed size
   1245         else:
   1246             zinfo.compress_size = zinfo.file_size
   1247         zip64 = zinfo.file_size > ZIP64_LIMIT or \
   1248                 zinfo.compress_size > ZIP64_LIMIT
   1249         if zip64 and not self._allowZip64:
   1250             raise LargeZipFile("Filesize would require ZIP64 extensions")
   1251         self.fp.write(zinfo.FileHeader(zip64))
   1252         self.fp.write(bytes)
   1253         if zinfo.flag_bits & 0x08:
   1254             # Write CRC and file sizes after the file data
   1255             fmt = '<LQQ' if zip64 else '<LLL'
   1256             self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
   1257                   zinfo.file_size))
   1258         self.fp.flush()
   1259         self.filelist.append(zinfo)
   1260         self.NameToInfo[zinfo.filename] = zinfo
   1261 
   1262     def __del__(self):
   1263         """Call the "close()" method in case the user forgot."""
   1264         self.close()
   1265 
   1266     def close(self):
   1267         """Close the file, and for mode "w" and "a" write the ending
   1268         records."""
   1269         if self.fp is None:
   1270             return
   1271 
   1272         try:
   1273             if self.mode in ("w", "a") and self._didModify: # write ending records
   1274                 pos1 = self.fp.tell()
   1275                 for zinfo in self.filelist:         # write central directory
   1276                     dt = zinfo.date_time
   1277                     dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
   1278                     dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
   1279                     extra = []
   1280                     if zinfo.file_size > ZIP64_LIMIT \
   1281                             or zinfo.compress_size > ZIP64_LIMIT:
   1282                         extra.append(zinfo.file_size)
   1283                         extra.append(zinfo.compress_size)
   1284                         file_size = 0xffffffff
   1285                         compress_size = 0xffffffff
   1286                     else:
   1287                         file_size = zinfo.file_size
   1288                         compress_size = zinfo.compress_size
   1289 
   1290                     header_offset = zinfo.header_offset - self._start_disk
   1291                     if header_offset > ZIP64_LIMIT:
   1292                         extra.append(header_offset)
   1293                         header_offset = 0xffffffffL
   1294 
   1295                     extra_data = zinfo.extra
   1296                     if extra:
   1297                         # Append a ZIP64 field to the extra's
   1298                         extra_data = struct.pack(
   1299                                 '<HH' + 'Q'*len(extra),
   1300                                 1, 8*len(extra), *extra) + extra_data
   1301 
   1302                         extract_version = max(45, zinfo.extract_version)
   1303                         create_version = max(45, zinfo.create_version)
   1304                     else:
   1305                         extract_version = zinfo.extract_version
   1306                         create_version = zinfo.create_version
   1307 
   1308                     try:
   1309                         filename, flag_bits = zinfo._encodeFilenameFlags()
   1310                         centdir = struct.pack(structCentralDir,
   1311                         stringCentralDir, create_version,
   1312                         zinfo.create_system, extract_version, zinfo.reserved,
   1313                         flag_bits, zinfo.compress_type, dostime, dosdate,
   1314                         zinfo.CRC, compress_size, file_size,
   1315                         len(filename), len(extra_data), len(zinfo.comment),
   1316                         0, zinfo.internal_attr, zinfo.external_attr,
   1317                         header_offset)
   1318                     except DeprecationWarning:
   1319                         print >>sys.stderr, (structCentralDir,
   1320                         stringCentralDir, create_version,
   1321                         zinfo.create_system, extract_version, zinfo.reserved,
   1322                         zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
   1323                         zinfo.CRC, compress_size, file_size,
   1324                         len(zinfo.filename), len(extra_data), len(zinfo.comment),
   1325                         0, zinfo.internal_attr, zinfo.external_attr,
   1326                         header_offset)
   1327                         raise
   1328                     self.fp.write(centdir)
   1329                     self.fp.write(filename)
   1330                     self.fp.write(extra_data)
   1331                     self.fp.write(zinfo.comment)
   1332 
   1333                 pos2 = self.fp.tell()
   1334                 # Write end-of-zip-archive record
   1335                 centDirCount = len(self.filelist)
   1336                 centDirSize = pos2 - pos1
   1337                 centDirOffset = pos1 - self._start_disk
   1338                 requires_zip64 = None
   1339                 if centDirCount > ZIP_FILECOUNT_LIMIT:
   1340                     requires_zip64 = "Files count"
   1341                 elif centDirOffset > ZIP64_LIMIT:
   1342                     requires_zip64 = "Central directory offset"
   1343                 elif centDirSize > ZIP64_LIMIT:
   1344                     requires_zip64 = "Central directory size"
   1345                 if requires_zip64:
   1346                     # Need to write the ZIP64 end-of-archive records
   1347                     if not self._allowZip64:
   1348                         raise LargeZipFile(requires_zip64 +
   1349                                            " would require ZIP64 extensions")
   1350                     zip64endrec = struct.pack(
   1351                             structEndArchive64, stringEndArchive64,
   1352                             44, 45, 45, 0, 0, centDirCount, centDirCount,
   1353                             centDirSize, centDirOffset)
   1354                     self.fp.write(zip64endrec)
   1355 
   1356                     zip64locrec = struct.pack(
   1357                             structEndArchive64Locator,
   1358                             stringEndArchive64Locator, 0, pos2, 1)
   1359                     self.fp.write(zip64locrec)
   1360                     centDirCount = min(centDirCount, 0xFFFF)
   1361                     centDirSize = min(centDirSize, 0xFFFFFFFF)
   1362                     centDirOffset = min(centDirOffset, 0xFFFFFFFF)
   1363 
   1364                 endrec = struct.pack(structEndArchive, stringEndArchive,
   1365                                     0, 0, centDirCount, centDirCount,
   1366                                     centDirSize, centDirOffset, len(self._comment))
   1367                 self.fp.write(endrec)
   1368                 self.fp.write(self._comment)
   1369                 self.fp.flush()
   1370         finally:
   1371             fp = self.fp
   1372             self.fp = None
   1373             if not self._filePassed:
   1374                 fp.close()
   1375 
   1376 
   1377 class PyZipFile(ZipFile):
   1378     """Class to create ZIP archives with Python library files and packages."""
   1379 
   1380     def writepy(self, pathname, basename = ""):
   1381         """Add all files from "pathname" to the ZIP archive.
   1382 
   1383         If pathname is a package directory, search the directory and
   1384         all package subdirectories recursively for all *.py and enter
   1385         the modules into the archive.  If pathname is a plain
   1386         directory, listdir *.py and enter all modules.  Else, pathname
   1387         must be a Python *.py file and the module will be put into the
   1388         archive.  Added modules are always module.pyo or module.pyc.
   1389         This method will compile the module.py into module.pyc if
   1390         necessary.
   1391         """
   1392         dir, name = os.path.split(pathname)
   1393         if os.path.isdir(pathname):
   1394             initname = os.path.join(pathname, "__init__.py")
   1395             if os.path.isfile(initname):
   1396                 # This is a package directory, add it
   1397                 if basename:
   1398                     basename = "%s/%s" % (basename, name)
   1399                 else:
   1400                     basename = name
   1401                 if self.debug:
   1402                     print "Adding package in", pathname, "as", basename
   1403                 fname, arcname = self._get_codename(initname[0:-3], basename)
   1404                 if self.debug:
   1405                     print "Adding", arcname
   1406                 self.write(fname, arcname)
   1407                 dirlist = os.listdir(pathname)
   1408                 dirlist.remove("__init__.py")
   1409                 # Add all *.py files and package subdirectories
   1410                 for filename in dirlist:
   1411                     path = os.path.join(pathname, filename)
   1412                     root, ext = os.path.splitext(filename)
   1413                     if os.path.isdir(path):
   1414                         if os.path.isfile(os.path.join(path, "__init__.py")):
   1415                             # This is a package directory, add it
   1416                             self.writepy(path, basename)  # Recursive call
   1417                     elif ext == ".py":
   1418                         fname, arcname = self._get_codename(path[0:-3],
   1419                                          basename)
   1420                         if self.debug:
   1421                             print "Adding", arcname
   1422                         self.write(fname, arcname)
   1423             else:
   1424                 # This is NOT a package directory, add its files at top level
   1425                 if self.debug:
   1426                     print "Adding files from directory", pathname
   1427                 for filename in os.listdir(pathname):
   1428                     path = os.path.join(pathname, filename)
   1429                     root, ext = os.path.splitext(filename)
   1430                     if ext == ".py":
   1431                         fname, arcname = self._get_codename(path[0:-3],
   1432                                          basename)
   1433                         if self.debug:
   1434                             print "Adding", arcname
   1435                         self.write(fname, arcname)
   1436         else:
   1437             if pathname[-3:] != ".py":
   1438                 raise RuntimeError, \
   1439                       'Files added with writepy() must end with ".py"'
   1440             fname, arcname = self._get_codename(pathname[0:-3], basename)
   1441             if self.debug:
   1442                 print "Adding file", arcname
   1443             self.write(fname, arcname)
   1444 
   1445     def _get_codename(self, pathname, basename):
   1446         """Return (filename, archivename) for the path.
   1447 
   1448         Given a module name path, return the correct file path and
   1449         archive name, compiling if necessary.  For example, given
   1450         /python/lib/string, return (/python/lib/string.pyc, string).
   1451         """
   1452         file_py  = pathname + ".py"
   1453         file_pyc = pathname + ".pyc"
   1454         file_pyo = pathname + ".pyo"
   1455         if os.path.isfile(file_pyo) and \
   1456                             os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
   1457             fname = file_pyo    # Use .pyo file
   1458         elif not os.path.isfile(file_pyc) or \
   1459              os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
   1460             import py_compile
   1461             if self.debug:
   1462                 print "Compiling", file_py
   1463             try:
   1464                 py_compile.compile(file_py, file_pyc, None, True)
   1465             except py_compile.PyCompileError,err:
   1466                 print err.msg
   1467             fname = file_pyc
   1468         else:
   1469             fname = file_pyc
   1470         archivename = os.path.split(fname)[1]
   1471         if basename:
   1472             archivename = "%s/%s" % (basename, archivename)
   1473         return (fname, archivename)
   1474 
   1475 
   1476 def main(args = None):
   1477     import textwrap
   1478     USAGE=textwrap.dedent("""\
   1479         Usage:
   1480             zipfile.py -l zipfile.zip        # Show listing of a zipfile
   1481             zipfile.py -t zipfile.zip        # Test if a zipfile is valid
   1482             zipfile.py -e zipfile.zip target # Extract zipfile into target dir
   1483             zipfile.py -c zipfile.zip src ... # Create zipfile from sources
   1484         """)
   1485     if args is None:
   1486         args = sys.argv[1:]
   1487 
   1488     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
   1489         print USAGE
   1490         sys.exit(1)
   1491 
   1492     if args[0] == '-l':
   1493         if len(args) != 2:
   1494             print USAGE
   1495             sys.exit(1)
   1496         with ZipFile(args[1], 'r') as zf:
   1497             zf.printdir()
   1498 
   1499     elif args[0] == '-t':
   1500         if len(args) != 2:
   1501             print USAGE
   1502             sys.exit(1)
   1503         with ZipFile(args[1], 'r') as zf:
   1504             badfile = zf.testzip()
   1505         if badfile:
   1506             print("The following enclosed file is corrupted: {!r}".format(badfile))
   1507         print "Done testing"
   1508 
   1509     elif args[0] == '-e':
   1510         if len(args) != 3:
   1511             print USAGE
   1512             sys.exit(1)
   1513 
   1514         with ZipFile(args[1], 'r') as zf:
   1515             zf.extractall(args[2])
   1516 
   1517     elif args[0] == '-c':
   1518         if len(args) < 3:
   1519             print USAGE
   1520             sys.exit(1)
   1521 
   1522         def addToZip(zf, path, zippath):
   1523             if os.path.isfile(path):
   1524                 zf.write(path, zippath, ZIP_DEFLATED)
   1525             elif os.path.isdir(path):
   1526                 if zippath:
   1527                     zf.write(path, zippath)
   1528                 for nm in os.listdir(path):
   1529                     addToZip(zf,
   1530                             os.path.join(path, nm), os.path.join(zippath, nm))
   1531             # else: ignore
   1532 
   1533         with ZipFile(args[1], 'w', allowZip64=True) as zf:
   1534             for path in args[2:]:
   1535                 zippath = os.path.basename(path)
   1536                 if not zippath:
   1537                     zippath = os.path.basename(os.path.dirname(path))
   1538                 if zippath in ('', os.curdir, os.pardir):
   1539                     zippath = ''
   1540                 addToZip(zf, path, zippath)
   1541 
   1542 if __name__ == "__main__":
   1543     main()
   1544