Home | History | Annotate | Download | only in Lib
      1 """
      2 Read and write ZIP files.
      3 """
      4 import struct, os, time, sys, shutil
      5 import binascii, cStringIO, stat
      6 import io
      7 import re
      8 import string
      9 
     10 try:
     11     import zlib # We may need its compression method

     12     crc32 = zlib.crc32
     13 except ImportError:
     14     zlib = None
     15     crc32 = binascii.crc32
     16 
     17 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
     18            "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
     19 
     20 class BadZipfile(Exception):
     21     pass
     22 
     23 
     24 class LargeZipFile(Exception):
     25     """
     26     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
     27     and those extensions are disabled.
     28     """
     29 
     30 error = BadZipfile      # The exception raised by this module

     31 
     32 ZIP64_LIMIT = (1 << 31) - 1
     33 ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
     34 ZIP_MAX_COMMENT = (1 << 16) - 1
     35 
     36 # constants for Zip file compression methods

     37 ZIP_STORED = 0
     38 ZIP_DEFLATED = 8
     39 # Other ZIP compression methods not supported

     40 
     41 # Below are some formats and associated data for reading/writing headers using

     42 # the struct module.  The names and structures of headers/records are those used

     43 # in the PKWARE description of the ZIP file format:

     44 #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT

     45 # (URL valid as of January 2008)

     46 
     47 # The "end of central directory" structure, magic number, size, and indices

     48 # (section V.I in the format document)

     49 structEndArchive = "<4s4H2LH"
     50 stringEndArchive = "PK\005\006"
     51 sizeEndCentDir = struct.calcsize(structEndArchive)
     52 
     53 _ECD_SIGNATURE = 0
     54 _ECD_DISK_NUMBER = 1
     55 _ECD_DISK_START = 2
     56 _ECD_ENTRIES_THIS_DISK = 3
     57 _ECD_ENTRIES_TOTAL = 4
     58 _ECD_SIZE = 5
     59 _ECD_OFFSET = 6
     60 _ECD_COMMENT_SIZE = 7
     61 # These last two indices are not part of the structure as defined in the

     62 # spec, but they are used internally by this module as a convenience

     63 _ECD_COMMENT = 8
     64 _ECD_LOCATION = 9
     65 
     66 # The "central directory" structure, magic number, size, and indices

     67 # of entries in the structure (section V.F in the format document)

     68 structCentralDir = "<4s4B4HL2L5H2L"
     69 stringCentralDir = "PK\001\002"
     70 sizeCentralDir = struct.calcsize(structCentralDir)
     71 
     72 # indexes of entries in the central directory structure

     73 _CD_SIGNATURE = 0
     74 _CD_CREATE_VERSION = 1
     75 _CD_CREATE_SYSTEM = 2
     76 _CD_EXTRACT_VERSION = 3
     77 _CD_EXTRACT_SYSTEM = 4
     78 _CD_FLAG_BITS = 5
     79 _CD_COMPRESS_TYPE = 6
     80 _CD_TIME = 7
     81 _CD_DATE = 8
     82 _CD_CRC = 9
     83 _CD_COMPRESSED_SIZE = 10
     84 _CD_UNCOMPRESSED_SIZE = 11
     85 _CD_FILENAME_LENGTH = 12
     86 _CD_EXTRA_FIELD_LENGTH = 13
     87 _CD_COMMENT_LENGTH = 14
     88 _CD_DISK_NUMBER_START = 15
     89 _CD_INTERNAL_FILE_ATTRIBUTES = 16
     90 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
     91 _CD_LOCAL_HEADER_OFFSET = 18
     92 
     93 # The "local file header" structure, magic number, size, and indices

     94 # (section V.A in the format document)

     95 structFileHeader = "<4s2B4HL2L2H"
     96 stringFileHeader = "PK\003\004"
     97 sizeFileHeader = struct.calcsize(structFileHeader)
     98 
     99 _FH_SIGNATURE = 0
    100 _FH_EXTRACT_VERSION = 1
    101 _FH_EXTRACT_SYSTEM = 2
    102 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
    103 _FH_COMPRESSION_METHOD = 4
    104 _FH_LAST_MOD_TIME = 5
    105 _FH_LAST_MOD_DATE = 6
    106 _FH_CRC = 7
    107 _FH_COMPRESSED_SIZE = 8
    108 _FH_UNCOMPRESSED_SIZE = 9
    109 _FH_FILENAME_LENGTH = 10
    110 _FH_EXTRA_FIELD_LENGTH = 11
    111 
    112 # The "Zip64 end of central directory locator" structure, magic number, and size

    113 structEndArchive64Locator = "<4sLQL"
    114 stringEndArchive64Locator = "PK\x06\x07"
    115 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
    116 
    117 # The "Zip64 end of central directory" record, magic number, size, and indices

    118 # (section V.G in the format document)

    119 structEndArchive64 = "<4sQ2H2L4Q"
    120 stringEndArchive64 = "PK\x06\x06"
    121 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
    122 
    123 _CD64_SIGNATURE = 0
    124 _CD64_DIRECTORY_RECSIZE = 1
    125 _CD64_CREATE_VERSION = 2
    126 _CD64_EXTRACT_VERSION = 3
    127 _CD64_DISK_NUMBER = 4
    128 _CD64_DISK_NUMBER_START = 5
    129 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
    130 _CD64_NUMBER_ENTRIES_TOTAL = 7
    131 _CD64_DIRECTORY_SIZE = 8
    132 _CD64_OFFSET_START_CENTDIR = 9
    133 
    134 def _check_zipfile(fp):
    135     try:
    136         if _EndRecData(fp):
    137             return True         # file has correct magic number

    138     except IOError:
    139         pass
    140     return False
    141 
    142 def is_zipfile(filename):
    143     """Quickly see if a file is a ZIP file by checking the magic number.
    144 
    145     The filename argument may be a file or file-like object too.
    146     """
    147     result = False
    148     try:
    149         if hasattr(filename, "read"):
    150             result = _check_zipfile(fp=filename)
    151         else:
    152             with open(filename, "rb") as fp:
    153                 result = _check_zipfile(fp)
    154     except IOError:
    155         pass
    156     return result
    157 
    158 def _EndRecData64(fpin, offset, endrec):
    159     """
    160     Read the ZIP64 end-of-archive records and use that to update endrec
    161     """
    162     try:
    163         fpin.seek(offset - sizeEndCentDir64Locator, 2)
    164     except IOError:
    165         # If the seek fails, the file is not large enough to contain a ZIP64

    166         # end-of-archive record, so just return the end record we were given.

    167         return endrec
    168 
    169     data = fpin.read(sizeEndCentDir64Locator)
    170     if len(data) != sizeEndCentDir64Locator:
    171         return endrec
    172     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
    173     if sig != stringEndArchive64Locator:
    174         return endrec
    175 
    176     if diskno != 0 or disks != 1:
    177         raise BadZipfile("zipfiles that span multiple disks are not supported")
    178 
    179     # Assume no 'zip64 extensible data'

    180     fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
    181     data = fpin.read(sizeEndCentDir64)
    182     if len(data) != sizeEndCentDir64:
    183         return endrec
    184     sig, sz, create_version, read_version, disk_num, disk_dir, \
    185             dircount, dircount2, dirsize, diroffset = \
    186             struct.unpack(structEndArchive64, data)
    187     if sig != stringEndArchive64:
    188         return endrec
    189 
    190     # Update the original endrec using data from the ZIP64 record

    191     endrec[_ECD_SIGNATURE] = sig
    192     endrec[_ECD_DISK_NUMBER] = disk_num
    193     endrec[_ECD_DISK_START] = disk_dir
    194     endrec[_ECD_ENTRIES_THIS_DISK] = dircount
    195     endrec[_ECD_ENTRIES_TOTAL] = dircount2
    196     endrec[_ECD_SIZE] = dirsize
    197     endrec[_ECD_OFFSET] = diroffset
    198     return endrec
    199 
    200 
    201 def _EndRecData(fpin):
    202     """Return data from the "End of Central Directory" record, or None.
    203 
    204     The data is a list of the nine items in the ZIP "End of central dir"
    205     record followed by a tenth item, the file seek offset of this record."""
    206 
    207     # Determine file size

    208     fpin.seek(0, 2)
    209     filesize = fpin.tell()
    210 
    211     # Check to see if this is ZIP file with no archive comment (the

    212     # "end of central directory" structure should be the last item in the

    213     # file if this is the case).

    214     try:
    215         fpin.seek(-sizeEndCentDir, 2)
    216     except IOError:
    217         return None
    218     data = fpin.read()
    219     if (len(data) == sizeEndCentDir and
    220         data[0:4] == stringEndArchive and
    221         data[-2:] == b"\000\000"):
    222         # the signature is correct and there's no comment, unpack structure

    223         endrec = struct.unpack(structEndArchive, data)
    224         endrec=list(endrec)
    225 
    226         # Append a blank comment and record start offset

    227         endrec.append("")
    228         endrec.append(filesize - sizeEndCentDir)
    229 
    230         # Try to read the "Zip64 end of central directory" structure

    231         return _EndRecData64(fpin, -sizeEndCentDir, endrec)
    232 
    233     # Either this is not a ZIP file, or it is a ZIP file with an archive

    234     # comment.  Search the end of the file for the "end of central directory"

    235     # record signature. The comment is the last item in the ZIP file and may be

    236     # up to 64K long.  It is assumed that the "end of central directory" magic

    237     # number does not appear in the comment.

    238     maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
    239     fpin.seek(maxCommentStart, 0)
    240     data = fpin.read()
    241     start = data.rfind(stringEndArchive)
    242     if start >= 0:
    243         # found the magic number; attempt to unpack and interpret

    244         recData = data[start:start+sizeEndCentDir]
    245         if len(recData) != sizeEndCentDir:
    246             # Zip file is corrupted.

    247             return None
    248         endrec = list(struct.unpack(structEndArchive, recData))
    249         commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file

    250         comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
    251         endrec.append(comment)
    252         endrec.append(maxCommentStart + start)
    253 
    254         # Try to read the "Zip64 end of central directory" structure

    255         return _EndRecData64(fpin, maxCommentStart + start - filesize,
    256                              endrec)
    257 
    258     # Unable to find a valid end of central directory structure

    259     return None
    260 
    261 
    262 class ZipInfo (object):
    263     """Class with attributes describing each file in the ZIP archive."""
    264 
    265     __slots__ = (
    266             'orig_filename',
    267             'filename',
    268             'date_time',
    269             'compress_type',
    270             'comment',
    271             'extra',
    272             'create_system',
    273             'create_version',
    274             'extract_version',
    275             'reserved',
    276             'flag_bits',
    277             'volume',
    278             'internal_attr',
    279             'external_attr',
    280             'header_offset',
    281             'CRC',
    282             'compress_size',
    283             'file_size',
    284             '_raw_time',
    285         )
    286 
    287     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
    288         self.orig_filename = filename   # Original file name in archive

    289 
    290         # Terminate the file name at the first null byte.  Null bytes in file

    291         # names are used as tricks by viruses in archives.

    292         null_byte = filename.find(chr(0))
    293         if null_byte >= 0:
    294             filename = filename[0:null_byte]
    295         # This is used to ensure paths in generated ZIP files always use

    296         # forward slashes as the directory separator, as required by the

    297         # ZIP format specification.

    298         if os.sep != "/" and os.sep in filename:
    299             filename = filename.replace(os.sep, "/")
    300 
    301         self.filename = filename        # Normalized file name

    302         self.date_time = date_time      # year, month, day, hour, min, sec

    303 
    304         if date_time[0] < 1980:
    305             raise ValueError('ZIP does not support timestamps before 1980')
    306 
    307         # Standard values:

    308         self.compress_type = ZIP_STORED # Type of compression for the file

    309         self.comment = ""               # Comment for each file

    310         self.extra = ""                 # ZIP extra data

    311         if sys.platform == 'win32':
    312             self.create_system = 0          # System which created ZIP archive

    313         else:
    314             # Assume everything else is unix-y

    315             self.create_system = 3          # System which created ZIP archive

    316         self.create_version = 20        # Version which created ZIP archive

    317         self.extract_version = 20       # Version needed to extract archive

    318         self.reserved = 0               # Must be zero

    319         self.flag_bits = 0              # ZIP flag bits

    320         self.volume = 0                 # Volume number of file header

    321         self.internal_attr = 0          # Internal attributes

    322         self.external_attr = 0          # External file attributes

    323         # Other attributes are set by class ZipFile:

    324         # header_offset         Byte offset to the file header

    325         # CRC                   CRC-32 of the uncompressed file

    326         # compress_size         Size of the compressed file

    327         # file_size             Size of the uncompressed file

    328 
    329     def FileHeader(self, zip64=None):
    330         """Return the per-file header as a string."""
    331         dt = self.date_time
    332         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
    333         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
    334         if self.flag_bits & 0x08:
    335             # Set these to zero because we write them after the file data

    336             CRC = compress_size = file_size = 0
    337         else:
    338             CRC = self.CRC
    339             compress_size = self.compress_size
    340             file_size = self.file_size
    341 
    342         extra = self.extra
    343 
    344         if zip64 is None:
    345             zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
    346         if zip64:
    347             fmt = '<HHQQ'
    348             extra = extra + struct.pack(fmt,
    349                     1, struct.calcsize(fmt)-4, file_size, compress_size)
    350         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
    351             if not zip64:
    352                 raise LargeZipFile("Filesize would require ZIP64 extensions")
    353             # File is larger than what fits into a 4 byte integer,

    354             # fall back to the ZIP64 extension

    355             file_size = 0xffffffff
    356             compress_size = 0xffffffff
    357             self.extract_version = max(45, self.extract_version)
    358             self.create_version = max(45, self.extract_version)
    359 
    360         filename, flag_bits = self._encodeFilenameFlags()
    361         header = struct.pack(structFileHeader, stringFileHeader,
    362                  self.extract_version, self.reserved, flag_bits,
    363                  self.compress_type, dostime, dosdate, CRC,
    364                  compress_size, file_size,
    365                  len(filename), len(extra))
    366         return header + filename + extra
    367 
    368     def _encodeFilenameFlags(self):
    369         if isinstance(self.filename, unicode):
    370             try:
    371                 return self.filename.encode('ascii'), self.flag_bits
    372             except UnicodeEncodeError:
    373                 return self.filename.encode('utf-8'), self.flag_bits | 0x800
    374         else:
    375             return self.filename, self.flag_bits
    376 
    377     def _decodeFilename(self):
    378         if self.flag_bits & 0x800:
    379             return self.filename.decode('utf-8')
    380         else:
    381             return self.filename
    382 
    383     def _decodeExtra(self):
    384         # Try to decode the extra field.

    385         extra = self.extra
    386         unpack = struct.unpack
    387         while len(extra) >= 4:
    388             tp, ln = unpack('<HH', extra[:4])
    389             if tp == 1:
    390                 if ln >= 24:
    391                     counts = unpack('<QQQ', extra[4:28])
    392                 elif ln == 16:
    393                     counts = unpack('<QQ', extra[4:20])
    394                 elif ln == 8:
    395                     counts = unpack('<Q', extra[4:12])
    396                 elif ln == 0:
    397                     counts = ()
    398                 else:
    399                     raise RuntimeError, "Corrupt extra field %s"%(ln,)
    400 
    401                 idx = 0
    402 
    403                 # ZIP64 extension (large files and/or large archives)

    404                 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
    405                     self.file_size = counts[idx]
    406                     idx += 1
    407 
    408                 if self.compress_size == 0xFFFFFFFFL:
    409                     self.compress_size = counts[idx]
    410                     idx += 1
    411 
    412                 if self.header_offset == 0xffffffffL:
    413                     old = self.header_offset
    414                     self.header_offset = counts[idx]
    415                     idx+=1
    416 
    417             extra = extra[ln+4:]
    418 
    419 
    420 class _ZipDecrypter:
    421     """Class to handle decryption of files stored within a ZIP archive.
    422 
    423     ZIP supports a password-based form of encryption. Even though known
    424     plaintext attacks have been found against it, it is still useful
    425     to be able to get data out of such a file.
    426 
    427     Usage:
    428         zd = _ZipDecrypter(mypwd)
    429         plain_char = zd(cypher_char)
    430         plain_text = map(zd, cypher_text)
    431     """
    432 
    433     def _GenerateCRCTable():
    434         """Generate a CRC-32 table.
    435 
    436         ZIP encryption uses the CRC32 one-byte primitive for scrambling some
    437         internal keys. We noticed that a direct implementation is faster than
    438         relying on binascii.crc32().
    439         """
    440         poly = 0xedb88320
    441         table = [0] * 256
    442         for i in range(256):
    443             crc = i
    444             for j in range(8):
    445                 if crc & 1:
    446                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
    447                 else:
    448                     crc = ((crc >> 1) & 0x7FFFFFFF)
    449             table[i] = crc
    450         return table
    451     crctable = _GenerateCRCTable()
    452 
    453     def _crc32(self, ch, crc):
    454         """Compute the CRC32 primitive on one byte."""
    455         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
    456 
    457     def __init__(self, pwd):
    458         self.key0 = 305419896
    459         self.key1 = 591751049
    460         self.key2 = 878082192
    461         for p in pwd:
    462             self._UpdateKeys(p)
    463 
    464     def _UpdateKeys(self, c):
    465         self.key0 = self._crc32(c, self.key0)
    466         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
    467         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
    468         self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
    469 
    470     def __call__(self, c):
    471         """Decrypt a single character."""
    472         c = ord(c)
    473         k = self.key2 | 2
    474         c = c ^ (((k * (k^1)) >> 8) & 255)
    475         c = chr(c)
    476         self._UpdateKeys(c)
    477         return c
    478 
    479 
    480 compressor_names = {
    481     0: 'store',
    482     1: 'shrink',
    483     2: 'reduce',
    484     3: 'reduce',
    485     4: 'reduce',
    486     5: 'reduce',
    487     6: 'implode',
    488     7: 'tokenize',
    489     8: 'deflate',
    490     9: 'deflate64',
    491     10: 'implode',
    492     12: 'bzip2',
    493     14: 'lzma',
    494     18: 'terse',
    495     19: 'lz77',
    496     97: 'wavpack',
    497     98: 'ppmd',
    498 }
    499 
    500 
    501 class ZipExtFile(io.BufferedIOBase):
    502     """File-like object for reading an archive member.
    503        Is returned by ZipFile.open().
    504     """
    505 
    506     # Max size supported by decompressor.

    507     MAX_N = 1 << 31 - 1
    508 
    509     # Read from compressed files in 4k blocks.

    510     MIN_READ_SIZE = 4096
    511 
    512     # Search for universal newlines or line chunks.

    513     PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
    514 
    515     def __init__(self, fileobj, mode, zipinfo, decrypter=None,
    516             close_fileobj=False):
    517         self._fileobj = fileobj
    518         self._decrypter = decrypter
    519         self._close_fileobj = close_fileobj
    520 
    521         self._compress_type = zipinfo.compress_type
    522         self._compress_size = zipinfo.compress_size
    523         self._compress_left = zipinfo.compress_size
    524 
    525         if self._compress_type == ZIP_DEFLATED:
    526             self._decompressor = zlib.decompressobj(-15)
    527         elif self._compress_type != ZIP_STORED:
    528             descr = compressor_names.get(self._compress_type)
    529             if descr:
    530                 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
    531             else:
    532                 raise NotImplementedError("compression type %d" % (self._compress_type,))
    533         self._unconsumed = ''
    534 
    535         self._readbuffer = ''
    536         self._offset = 0
    537 
    538         self._universal = 'U' in mode
    539         self.newlines = None
    540 
    541         # Adjust read size for encrypted files since the first 12 bytes

    542         # are for the encryption/password information.

    543         if self._decrypter is not None:
    544             self._compress_left -= 12
    545 
    546         self.mode = mode
    547         self.name = zipinfo.filename
    548 
    549         if hasattr(zipinfo, 'CRC'):
    550             self._expected_crc = zipinfo.CRC
    551             self._running_crc = crc32(b'') & 0xffffffff
    552         else:
    553             self._expected_crc = None
    554 
    555     def readline(self, limit=-1):
    556         """Read and return a line from the stream.
    557 
    558         If limit is specified, at most limit bytes will be read.
    559         """
    560 
    561         if not self._universal and limit < 0:
    562             # Shortcut common case - newline found in buffer.

    563             i = self._readbuffer.find('\n', self._offset) + 1
    564             if i > 0:
    565                 line = self._readbuffer[self._offset: i]
    566                 self._offset = i
    567                 return line
    568 
    569         if not self._universal:
    570             return io.BufferedIOBase.readline(self, limit)
    571 
    572         line = ''
    573         while limit < 0 or len(line) < limit:
    574             readahead = self.peek(2)
    575             if readahead == '':
    576                 return line
    577 
    578             #

    579             # Search for universal newlines or line chunks.

    580             #

    581             # The pattern returns either a line chunk or a newline, but not

    582             # both. Combined with peek(2), we are assured that the sequence

    583             # '\r\n' is always retrieved completely and never split into

    584             # separate newlines - '\r', '\n' due to coincidental readaheads.

    585             #

    586             match = self.PATTERN.search(readahead)
    587             newline = match.group('newline')
    588             if newline is not None:
    589                 if self.newlines is None:
    590                     self.newlines = []
    591                 if newline not in self.newlines:
    592                     self.newlines.append(newline)
    593                 self._offset += len(newline)
    594                 return line + '\n'
    595 
    596             chunk = match.group('chunk')
    597             if limit >= 0:
    598                 chunk = chunk[: limit - len(line)]
    599 
    600             self._offset += len(chunk)
    601             line += chunk
    602 
    603         return line
    604 
    605     def peek(self, n=1):
    606         """Returns buffered bytes without advancing the position."""
    607         if n > len(self._readbuffer) - self._offset:
    608             chunk = self.read(n)
    609             if len(chunk) > self._offset:
    610                 self._readbuffer = chunk + self._readbuffer[self._offset:]
    611                 self._offset = 0
    612             else:
    613                 self._offset -= len(chunk)
    614 
    615         # Return up to 512 bytes to reduce allocation overhead for tight loops.

    616         return self._readbuffer[self._offset: self._offset + 512]
    617 
    618     def readable(self):
    619         return True
    620 
    621     def read(self, n=-1):
    622         """Read and return up to n bytes.
    623         If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
    624         """
    625         buf = ''
    626         if n is None:
    627             n = -1
    628         while True:
    629             if n < 0:
    630                 data = self.read1(n)
    631             elif n > len(buf):
    632                 data = self.read1(n - len(buf))
    633             else:
    634                 return buf
    635             if len(data) == 0:
    636                 return buf
    637             buf += data
    638 
    639     def _update_crc(self, newdata, eof):
    640         # Update the CRC using the given data.

    641         if self._expected_crc is None:
    642             # No need to compute the CRC if we don't have a reference value

    643             return
    644         self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
    645         # Check the CRC if we're at the end of the file

    646         if eof and self._running_crc != self._expected_crc:
    647             raise BadZipfile("Bad CRC-32 for file %r" % self.name)
    648 
    649     def read1(self, n):
    650         """Read up to n bytes with at most one read() system call."""
    651 
    652         # Simplify algorithm (branching) by transforming negative n to large n.

    653         if n < 0 or n is None:
    654             n = self.MAX_N
    655 
    656         # Bytes available in read buffer.

    657         len_readbuffer = len(self._readbuffer) - self._offset
    658 
    659         # Read from file.

    660         if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
    661             nbytes = n - len_readbuffer - len(self._unconsumed)
    662             nbytes = max(nbytes, self.MIN_READ_SIZE)
    663             nbytes = min(nbytes, self._compress_left)
    664 
    665             data = self._fileobj.read(nbytes)
    666             self._compress_left -= len(data)
    667 
    668             if data and self._decrypter is not None:
    669                 data = ''.join(map(self._decrypter, data))
    670 
    671             if self._compress_type == ZIP_STORED:
    672                 self._update_crc(data, eof=(self._compress_left==0))
    673                 self._readbuffer = self._readbuffer[self._offset:] + data
    674                 self._offset = 0
    675             else:
    676                 # Prepare deflated bytes for decompression.

    677                 self._unconsumed += data
    678 
    679         # Handle unconsumed data.

    680         if (len(self._unconsumed) > 0 and n > len_readbuffer and
    681             self._compress_type == ZIP_DEFLATED):
    682             data = self._decompressor.decompress(
    683                 self._unconsumed,
    684                 max(n - len_readbuffer, self.MIN_READ_SIZE)
    685             )
    686 
    687             self._unconsumed = self._decompressor.unconsumed_tail
    688             eof = len(self._unconsumed) == 0 and self._compress_left == 0
    689             if eof:
    690                 data += self._decompressor.flush()
    691 
    692             self._update_crc(data, eof=eof)
    693             self._readbuffer = self._readbuffer[self._offset:] + data
    694             self._offset = 0
    695 
    696         # Read from buffer.

    697         data = self._readbuffer[self._offset: self._offset + n]
    698         self._offset += len(data)
    699         return data
    700 
    701     def close(self):
    702         try :
    703             if self._close_fileobj:
    704                 self._fileobj.close()
    705         finally:
    706             super(ZipExtFile, self).close()
    707 
    708 
    709 class ZipFile(object):
    710     """ Class with methods to open, read, write, close, list zip files.
    711 
    712     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
    713 
    714     file: Either the path to the file, or a file-like object.
    715           If it is a path, the file will be opened and closed by ZipFile.
    716     mode: The mode can be either read "r", write "w" or append "a".
    717     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
    718     allowZip64: if True ZipFile will create files with ZIP64 extensions when
    719                 needed, otherwise it will raise an exception when this would
    720                 be necessary.
    721 
    722     """
    723 
    724     fp = None                   # Set here since __del__ checks it

    725 
    726     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
    727         """Open the ZIP file with mode read "r", write "w" or append "a"."""
    728         if mode not in ("r", "w", "a"):
    729             raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
    730 
    731         if compression == ZIP_STORED:
    732             pass
    733         elif compression == ZIP_DEFLATED:
    734             if not zlib:
    735                 raise RuntimeError,\
    736                       "Compression requires the (missing) zlib module"
    737         else:
    738             raise RuntimeError, "That compression method is not supported"
    739 
    740         self._allowZip64 = allowZip64
    741         self._didModify = False
    742         self.debug = 0  # Level of printing: 0 through 3

    743         self.NameToInfo = {}    # Find file info given name

    744         self.filelist = []      # List of ZipInfo instances for archive

    745         self.compression = compression  # Method of compression

    746         self.mode = key = mode.replace('b', '')[0]
    747         self.pwd = None
    748         self._comment = ''
    749 
    750         # Check if we were passed a file-like object

    751         if isinstance(file, basestring):
    752             self._filePassed = 0
    753             self.filename = file
    754             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
    755             try:
    756                 self.fp = open(file, modeDict[mode])
    757             except IOError:
    758                 if mode == 'a':
    759                     mode = key = 'w'
    760                     self.fp = open(file, modeDict[mode])
    761                 else:
    762                     raise
    763         else:
    764             self._filePassed = 1
    765             self.fp = file
    766             self.filename = getattr(file, 'name', None)
    767 
    768         try:
    769             if key == 'r':
    770                 self._RealGetContents()
    771             elif key == 'w':
    772                 # set the modified flag so central directory gets written

    773                 # even if no files are added to the archive

    774                 self._didModify = True
    775             elif key == 'a':
    776                 try:
    777                     # See if file is a zip file

    778                     self._RealGetContents()
    779                     # seek to start of directory and overwrite

    780                     self.fp.seek(self.start_dir, 0)
    781                 except BadZipfile:
    782                     # file is not a zip file, just append

    783                     self.fp.seek(0, 2)
    784 
    785                     # set the modified flag so central directory gets written

    786                     # even if no files are added to the archive

    787                     self._didModify = True
    788             else:
    789                 raise RuntimeError('Mode must be "r", "w" or "a"')
    790         except:
    791             fp = self.fp
    792             self.fp = None
    793             if not self._filePassed:
    794                 fp.close()
    795             raise
    796 
    797     def __enter__(self):
    798         return self
    799 
    800     def __exit__(self, type, value, traceback):
    801         self.close()
    802 
    803     def _RealGetContents(self):
    804         """Read in the table of contents for the ZIP file."""
    805         fp = self.fp
    806         try:
    807             endrec = _EndRecData(fp)
    808         except IOError:
    809             raise BadZipfile("File is not a zip file")
    810         if not endrec:
    811             raise BadZipfile, "File is not a zip file"
    812         if self.debug > 1:
    813             print endrec
    814         size_cd = endrec[_ECD_SIZE]             # bytes in central directory

    815         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory

    816         self._comment = endrec[_ECD_COMMENT]    # archive comment

    817 
    818         # "concat" is zero, unless zip was concatenated to another file

    819         concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
    820         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
    821             # If Zip64 extension structures are present, account for them

    822             concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
    823 
    824         if self.debug > 2:
    825             inferred = concat + offset_cd
    826             print "given, inferred, offset", offset_cd, inferred, concat
    827         # self.start_dir:  Position of start of central directory

    828         self.start_dir = offset_cd + concat
    829         fp.seek(self.start_dir, 0)
    830         data = fp.read(size_cd)
    831         fp = cStringIO.StringIO(data)
    832         total = 0
    833         while total < size_cd:
    834             centdir = fp.read(sizeCentralDir)
    835             if len(centdir) != sizeCentralDir:
    836                 raise BadZipfile("Truncated central directory")
    837             centdir = struct.unpack(structCentralDir, centdir)
    838             if centdir[_CD_SIGNATURE] != stringCentralDir:
    839                 raise BadZipfile("Bad magic number for central directory")
    840             if self.debug > 2:
    841                 print centdir
    842             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
    843             # Create ZipInfo instance to store file information

    844             x = ZipInfo(filename)
    845             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
    846             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
    847             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
    848             (x.create_version, x.create_system, x.extract_version, x.reserved,
    849                 x.flag_bits, x.compress_type, t, d,
    850                 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
    851             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
    852             # Convert date/time code to (year, month, day, hour, min, sec)

    853             x._raw_time = t
    854             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
    855                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
    856 
    857             x._decodeExtra()
    858             x.header_offset = x.header_offset + concat
    859             x.filename = x._decodeFilename()
    860             self.filelist.append(x)
    861             self.NameToInfo[x.filename] = x
    862 
    863             # update total bytes read from central directory

    864             total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
    865                      + centdir[_CD_EXTRA_FIELD_LENGTH]
    866                      + centdir[_CD_COMMENT_LENGTH])
    867 
    868             if self.debug > 2:
    869                 print "total", total
    870 
    871 
    872     def namelist(self):
    873         """Return a list of file names in the archive."""
    874         l = []
    875         for data in self.filelist:
    876             l.append(data.filename)
    877         return l
    878 
    879     def infolist(self):
    880         """Return a list of class ZipInfo instances for files in the
    881         archive."""
    882         return self.filelist
    883 
    884     def printdir(self):
    885         """Print a table of contents for the zip file."""
    886         print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
    887         for zinfo in self.filelist:
    888             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
    889             print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
    890 
    891     def testzip(self):
    892         """Read all the files and check the CRC."""
    893         chunk_size = 2 ** 20
    894         for zinfo in self.filelist:
    895             try:
    896                 # Read by chunks, to avoid an OverflowError or a

    897                 # MemoryError with very large embedded files.

    898                 with self.open(zinfo.filename, "r") as f:
    899                     while f.read(chunk_size):     # Check CRC-32

    900                         pass
    901             except BadZipfile:
    902                 return zinfo.filename
    903 
    904     def getinfo(self, name):
    905         """Return the instance of ZipInfo given 'name'."""
    906         info = self.NameToInfo.get(name)
    907         if info is None:
    908             raise KeyError(
    909                 'There is no item named %r in the archive' % name)
    910 
    911         return info
    912 
    913     def setpassword(self, pwd):
    914         """Set default password for encrypted files."""
    915         self.pwd = pwd
    916 
    917     @property
    918     def comment(self):
    919         """The comment text associated with the ZIP file."""
    920         return self._comment
    921 
    922     @comment.setter
    923     def comment(self, comment):
    924         # check for valid comment length

    925         if len(comment) > ZIP_MAX_COMMENT:
    926             import warnings
    927             warnings.warn('Archive comment is too long; truncating to %d bytes'
    928                           % ZIP_MAX_COMMENT, stacklevel=2)
    929             comment = comment[:ZIP_MAX_COMMENT]
    930         self._comment = comment
    931         self._didModify = True
    932 
    933     def read(self, name, pwd=None):
    934         """Return file bytes (as a string) for name."""
    935         return self.open(name, "r", pwd).read()
    936 
    937     def open(self, name, mode="r", pwd=None):
    938         """Return file-like object for 'name'."""
    939         if mode not in ("r", "U", "rU"):
    940             raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
    941         if not self.fp:
    942             raise RuntimeError, \
    943                   "Attempt to read ZIP archive that was already closed"
    944 
    945         # Only open a new file for instances where we were not

    946         # given a file object in the constructor

    947         if self._filePassed:
    948             zef_file = self.fp
    949             should_close = False
    950         else:
    951             zef_file = open(self.filename, 'rb')
    952             should_close = True
    953 
    954         try:
    955             # Make sure we have an info object

    956             if isinstance(name, ZipInfo):
    957                 # 'name' is already an info object

    958                 zinfo = name
    959             else:
    960                 # Get info object for name

    961                 zinfo = self.getinfo(name)
    962 
    963             zef_file.seek(zinfo.header_offset, 0)
    964 
    965             # Skip the file header:

    966             fheader = zef_file.read(sizeFileHeader)
    967             if len(fheader) != sizeFileHeader:
    968                 raise BadZipfile("Truncated file header")
    969             fheader = struct.unpack(structFileHeader, fheader)
    970             if fheader[_FH_SIGNATURE] != stringFileHeader:
    971                 raise BadZipfile("Bad magic number for file header")
    972 
    973             fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
    974             if fheader[_FH_EXTRA_FIELD_LENGTH]:
    975                 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
    976 
    977             if fname != zinfo.orig_filename:
    978                 raise BadZipfile, \
    979                         'File name in directory "%s" and header "%s" differ.' % (
    980                             zinfo.orig_filename, fname)
    981 
    982             # check for encrypted flag & handle password

    983             is_encrypted = zinfo.flag_bits & 0x1
    984             zd = None
    985             if is_encrypted:
    986                 if not pwd:
    987                     pwd = self.pwd
    988                 if not pwd:
    989                     raise RuntimeError, "File %s is encrypted, " \
    990                         "password required for extraction" % name
    991 
    992                 zd = _ZipDecrypter(pwd)
    993                 # The first 12 bytes in the cypher stream is an encryption header

    994                 #  used to strengthen the algorithm. The first 11 bytes are

    995                 #  completely random, while the 12th contains the MSB of the CRC,

    996                 #  or the MSB of the file time depending on the header type

    997                 #  and is used to check the correctness of the password.

    998                 bytes = zef_file.read(12)
    999                 h = map(zd, bytes[0:12])
   1000                 if zinfo.flag_bits & 0x8:
   1001                     # compare against the file type from extended local headers

   1002                     check_byte = (zinfo._raw_time >> 8) & 0xff
   1003                 else:
   1004                     # compare against the CRC otherwise

   1005                     check_byte = (zinfo.CRC >> 24) & 0xff
   1006                 if ord(h[11]) != check_byte:
   1007                     raise RuntimeError("Bad password for file", name)
   1008 
   1009             return ZipExtFile(zef_file, mode, zinfo, zd,
   1010                     close_fileobj=should_close)
   1011         except:
   1012             if should_close:
   1013                 zef_file.close()
   1014             raise
   1015 
   1016     def extract(self, member, path=None, pwd=None):
   1017         """Extract a member from the archive to the current working directory,
   1018            using its full name. Its file information is extracted as accurately
   1019            as possible. `member' may be a filename or a ZipInfo object. You can
   1020            specify a different directory using `path'.
   1021         """
   1022         if not isinstance(member, ZipInfo):
   1023             member = self.getinfo(member)
   1024 
   1025         if path is None:
   1026             path = os.getcwd()
   1027 
   1028         return self._extract_member(member, path, pwd)
   1029 
   1030     def extractall(self, path=None, members=None, pwd=None):
   1031         """Extract all members from the archive to the current working
   1032            directory. `path' specifies a different directory to extract to.
   1033            `members' is optional and must be a subset of the list returned
   1034            by namelist().
   1035         """
   1036         if members is None:
   1037             members = self.namelist()
   1038 
   1039         for zipinfo in members:
   1040             self.extract(zipinfo, path, pwd)
   1041 
   1042     def _extract_member(self, member, targetpath, pwd):
   1043         """Extract the ZipInfo object 'member' to a physical
   1044            file on the path targetpath.
   1045         """
   1046         # build the destination pathname, replacing
   1047         # forward slashes to platform specific separators.
   1048         arcname = member.filename.replace('/', os.path.sep)
   1049 
   1050         if os.path.altsep:
   1051             arcname = arcname.replace(os.path.altsep, os.path.sep)
   1052         # interpret absolute pathname as relative, remove drive letter or
   1053         # UNC path, redundant separators, "." and ".." components.
   1054         arcname = os.path.splitdrive(arcname)[1]
   1055         arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
   1056                     if x not in ('', os.path.curdir, os.path.pardir))
   1057         if os.path.sep == '\\':
   1058             # filter illegal characters on Windows
   1059             illegal = ':<>|"?*'
   1060             if isinstance(arcname, unicode):
   1061                 table = {ord(c): ord('_') for c in illegal}
   1062             else:
   1063                 table = string.maketrans(illegal, '_' * len(illegal))
   1064             arcname = arcname.translate(table)
   1065             # remove trailing dots
   1066             arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
   1067             arcname = os.path.sep.join(x for x in arcname if x)
   1068 
   1069         targetpath = os.path.join(targetpath, arcname)
   1070         targetpath = os.path.normpath(targetpath)
   1071 
   1072         # Create all upper directories if necessary.
   1073         upperdirs = os.path.dirname(targetpath)
   1074         if upperdirs and not os.path.exists(upperdirs):
   1075             os.makedirs(upperdirs)
   1076 
   1077         if member.filename[-1] == '/':
   1078             if not os.path.isdir(targetpath):
   1079                 os.mkdir(targetpath)
   1080             return targetpath
   1081 
   1082         with self.open(member, pwd=pwd) as source, \
   1083              file(targetpath, "wb") as target:
   1084             shutil.copyfileobj(source, target)
   1085 
   1086         return targetpath
   1087 
   1088     def _writecheck(self, zinfo):
   1089         """Check for errors before writing a file to the archive."""
   1090         if zinfo.filename in self.NameToInfo:
   1091             import warnings
   1092             warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
   1093         if self.mode not in ("w", "a"):
   1094             raise RuntimeError, 'write() requires mode "w" or "a"'
   1095         if not self.fp:
   1096             raise RuntimeError, \
   1097                   "Attempt to write ZIP archive that was already closed"
   1098         if zinfo.compress_type == ZIP_DEFLATED and not zlib:
   1099             raise RuntimeError, \
   1100                   "Compression requires the (missing) zlib module"
   1101         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
   1102             raise RuntimeError, \
   1103                   "That compression method is not supported"
   1104         if not self._allowZip64:
   1105             requires_zip64 = None
   1106             if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
   1107                 requires_zip64 = "Files count"
   1108             elif zinfo.file_size > ZIP64_LIMIT:
   1109                 requires_zip64 = "Filesize"
   1110             elif zinfo.header_offset > ZIP64_LIMIT:
   1111                 requires_zip64 = "Zipfile size"
   1112             if requires_zip64:
   1113                 raise LargeZipFile(requires_zip64 +
   1114                                    " would require ZIP64 extensions")
   1115 
   1116     def write(self, filename, arcname=None, compress_type=None):
   1117         """Put the bytes from filename into the archive under the name
   1118         arcname."""
   1119         if not self.fp:
   1120             raise RuntimeError(
   1121                   "Attempt to write to ZIP archive that was already closed")
   1122 
   1123         st = os.stat(filename)
   1124         isdir = stat.S_ISDIR(st.st_mode)
   1125         mtime = time.localtime(st.st_mtime)
   1126         date_time = mtime[0:6]
   1127         # Create ZipInfo instance to store file information
   1128         if arcname is None:
   1129             arcname = filename
   1130         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
   1131         while arcname[0] in (os.sep, os.altsep):
   1132             arcname = arcname[1:]
   1133         if isdir:
   1134             arcname += '/'
   1135         zinfo = ZipInfo(arcname, date_time)
   1136         zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
   1137         if compress_type is None:
   1138             zinfo.compress_type = self.compression
   1139         else:
   1140             zinfo.compress_type = compress_type
   1141 
   1142         zinfo.file_size = st.st_size
   1143         zinfo.flag_bits = 0x00
   1144         zinfo.header_offset = self.fp.tell()    # Start of header bytes
   1145 
   1146         self._writecheck(zinfo)
   1147         self._didModify = True
   1148 
   1149         if isdir:
   1150             zinfo.file_size = 0
   1151             zinfo.compress_size = 0
   1152             zinfo.CRC = 0
   1153             zinfo.external_attr |= 0x10  # MS-DOS directory flag
   1154             self.filelist.append(zinfo)
   1155             self.NameToInfo[zinfo.filename] = zinfo
   1156             self.fp.write(zinfo.FileHeader(False))
   1157             return
   1158 
   1159         with open(filename, "rb") as fp:
   1160             # Must overwrite CRC and sizes with correct data later
   1161             zinfo.CRC = CRC = 0
   1162             zinfo.compress_size = compress_size = 0
   1163             # Compressed size can be larger than uncompressed size
   1164             zip64 = self._allowZip64 and \
   1165                     zinfo.file_size * 1.05 > ZIP64_LIMIT
   1166             self.fp.write(zinfo.FileHeader(zip64))
   1167             if zinfo.compress_type == ZIP_DEFLATED:
   1168                 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
   1169                      zlib.DEFLATED, -15)
   1170             else:
   1171                 cmpr = None
   1172             file_size = 0
   1173             while 1:
   1174                 buf = fp.read(1024 * 8)
   1175                 if not buf:
   1176                     break
   1177                 file_size = file_size + len(buf)
   1178                 CRC = crc32(buf, CRC) & 0xffffffff
   1179                 if cmpr:
   1180                     buf = cmpr.compress(buf)
   1181                     compress_size = compress_size + len(buf)
   1182                 self.fp.write(buf)
   1183         if cmpr:
   1184             buf = cmpr.flush()
   1185             compress_size = compress_size + len(buf)
   1186             self.fp.write(buf)
   1187             zinfo.compress_size = compress_size
   1188         else:
   1189             zinfo.compress_size = file_size
   1190         zinfo.CRC = CRC
   1191         zinfo.file_size = file_size
   1192         if not zip64 and self._allowZip64:
   1193             if file_size > ZIP64_LIMIT:
   1194                 raise RuntimeError('File size has increased during compressing')
   1195             if compress_size > ZIP64_LIMIT:
   1196                 raise RuntimeError('Compressed size larger than uncompressed size')
   1197         # Seek backwards and write file header (which will now include
   1198         # correct CRC and file sizes)
   1199         position = self.fp.tell()       # Preserve current position in file
   1200         self.fp.seek(zinfo.header_offset, 0)
   1201         self.fp.write(zinfo.FileHeader(zip64))
   1202         self.fp.seek(position, 0)
   1203         self.filelist.append(zinfo)
   1204         self.NameToInfo[zinfo.filename] = zinfo
   1205 
   1206     def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
   1207         """Write a file into the archive.  The contents is the string
   1208         'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
   1209         the name of the file in the archive."""
   1210         if not isinstance(zinfo_or_arcname, ZipInfo):
   1211             zinfo = ZipInfo(filename=zinfo_or_arcname,
   1212                             date_time=time.localtime(time.time())[:6])
   1213 
   1214             zinfo.compress_type = self.compression
   1215             if zinfo.filename[-1] == '/':
   1216                 zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
   1217                 zinfo.external_attr |= 0x10           # MS-DOS directory flag
   1218             else:
   1219                 zinfo.external_attr = 0o600 << 16     # ?rw-------
   1220         else:
   1221             zinfo = zinfo_or_arcname
   1222 
   1223         if not self.fp:
   1224             raise RuntimeError(
   1225                   "Attempt to write to ZIP archive that was already closed")
   1226 
   1227         if compress_type is not None:
   1228             zinfo.compress_type = compress_type
   1229 
   1230         zinfo.file_size = len(bytes)            # Uncompressed size
   1231         zinfo.header_offset = self.fp.tell()    # Start of header bytes
   1232         self._writecheck(zinfo)
   1233         self._didModify = True
   1234         zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
   1235         if zinfo.compress_type == ZIP_DEFLATED:
   1236             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
   1237                  zlib.DEFLATED, -15)
   1238             bytes = co.compress(bytes) + co.flush()
   1239             zinfo.compress_size = len(bytes)    # Compressed size
   1240         else:
   1241             zinfo.compress_size = zinfo.file_size
   1242         zip64 = zinfo.file_size > ZIP64_LIMIT or \
   1243                 zinfo.compress_size > ZIP64_LIMIT
   1244         if zip64 and not self._allowZip64:
   1245             raise LargeZipFile("Filesize would require ZIP64 extensions")
   1246         self.fp.write(zinfo.FileHeader(zip64))
   1247         self.fp.write(bytes)
   1248         if zinfo.flag_bits & 0x08:
   1249             # Write CRC and file sizes after the file data
   1250             fmt = '<LQQ' if zip64 else '<LLL'
   1251             self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
   1252                   zinfo.file_size))
   1253         self.fp.flush()
   1254         self.filelist.append(zinfo)
   1255         self.NameToInfo[zinfo.filename] = zinfo
   1256 
   1257     def __del__(self):
   1258         """Call the "close()" method in case the user forgot."""
   1259         self.close()
   1260 
   1261     def close(self):
   1262         """Close the file, and for mode "w" and "a" write the ending
   1263         records."""
   1264         if self.fp is None:
   1265             return
   1266 
   1267         try:
   1268             if self.mode in ("w", "a") and self._didModify: # write ending records
   1269                 pos1 = self.fp.tell()
   1270                 for zinfo in self.filelist:         # write central directory
   1271                     dt = zinfo.date_time
   1272                     dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
   1273                     dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
   1274                     extra = []
   1275                     if zinfo.file_size > ZIP64_LIMIT \
   1276                             or zinfo.compress_size > ZIP64_LIMIT:
   1277                         extra.append(zinfo.file_size)
   1278                         extra.append(zinfo.compress_size)
   1279                         file_size = 0xffffffff
   1280                         compress_size = 0xffffffff
   1281                     else:
   1282                         file_size = zinfo.file_size
   1283                         compress_size = zinfo.compress_size
   1284 
   1285                     if zinfo.header_offset > ZIP64_LIMIT:
   1286                         extra.append(zinfo.header_offset)
   1287                         header_offset = 0xffffffffL
   1288                     else:
   1289                         header_offset = zinfo.header_offset
   1290 
   1291                     extra_data = zinfo.extra
   1292                     if extra:
   1293                         # Append a ZIP64 field to the extra's
   1294                         extra_data = struct.pack(
   1295                                 '<HH' + 'Q'*len(extra),
   1296                                 1, 8*len(extra), *extra) + extra_data
   1297 
   1298                         extract_version = max(45, zinfo.extract_version)
   1299                         create_version = max(45, zinfo.create_version)
   1300                     else:
   1301                         extract_version = zinfo.extract_version
   1302                         create_version = zinfo.create_version
   1303 
   1304                     try:
   1305                         filename, flag_bits = zinfo._encodeFilenameFlags()
   1306                         centdir = struct.pack(structCentralDir,
   1307                         stringCentralDir, create_version,
   1308                         zinfo.create_system, extract_version, zinfo.reserved,
   1309                         flag_bits, zinfo.compress_type, dostime, dosdate,
   1310                         zinfo.CRC, compress_size, file_size,
   1311                         len(filename), len(extra_data), len(zinfo.comment),
   1312                         0, zinfo.internal_attr, zinfo.external_attr,
   1313                         header_offset)
   1314                     except DeprecationWarning:
   1315                         print >>sys.stderr, (structCentralDir,
   1316                         stringCentralDir, create_version,
   1317                         zinfo.create_system, extract_version, zinfo.reserved,
   1318                         zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
   1319                         zinfo.CRC, compress_size, file_size,
   1320                         len(zinfo.filename), len(extra_data), len(zinfo.comment),
   1321                         0, zinfo.internal_attr, zinfo.external_attr,
   1322                         header_offset)
   1323                         raise
   1324                     self.fp.write(centdir)
   1325                     self.fp.write(filename)
   1326                     self.fp.write(extra_data)
   1327                     self.fp.write(zinfo.comment)
   1328 
   1329                 pos2 = self.fp.tell()
   1330                 # Write end-of-zip-archive record
   1331                 centDirCount = len(self.filelist)
   1332                 centDirSize = pos2 - pos1
   1333                 centDirOffset = pos1
   1334                 requires_zip64 = None
   1335                 if centDirCount > ZIP_FILECOUNT_LIMIT:
   1336                     requires_zip64 = "Files count"
   1337                 elif centDirOffset > ZIP64_LIMIT:
   1338                     requires_zip64 = "Central directory offset"
   1339                 elif centDirSize > ZIP64_LIMIT:
   1340                     requires_zip64 = "Central directory size"
   1341                 if requires_zip64:
   1342                     # Need to write the ZIP64 end-of-archive records
   1343                     if not self._allowZip64:
   1344                         raise LargeZipFile(requires_zip64 +
   1345                                            " would require ZIP64 extensions")
   1346                     zip64endrec = struct.pack(
   1347                             structEndArchive64, stringEndArchive64,
   1348                             44, 45, 45, 0, 0, centDirCount, centDirCount,
   1349                             centDirSize, centDirOffset)
   1350                     self.fp.write(zip64endrec)
   1351 
   1352                     zip64locrec = struct.pack(
   1353                             structEndArchive64Locator,
   1354                             stringEndArchive64Locator, 0, pos2, 1)
   1355                     self.fp.write(zip64locrec)
   1356                     centDirCount = min(centDirCount, 0xFFFF)
   1357                     centDirSize = min(centDirSize, 0xFFFFFFFF)
   1358                     centDirOffset = min(centDirOffset, 0xFFFFFFFF)
   1359 
   1360                 endrec = struct.pack(structEndArchive, stringEndArchive,
   1361                                     0, 0, centDirCount, centDirCount,
   1362                                     centDirSize, centDirOffset, len(self._comment))
   1363                 self.fp.write(endrec)
   1364                 self.fp.write(self._comment)
   1365                 self.fp.flush()
   1366         finally:
   1367             fp = self.fp
   1368             self.fp = None
   1369             if not self._filePassed:
   1370                 fp.close()
   1371 
   1372 
   1373 class PyZipFile(ZipFile):
   1374     """Class to create ZIP archives with Python library files and packages."""
   1375 
   1376     def writepy(self, pathname, basename = ""):
   1377         """Add all files from "pathname" to the ZIP archive.
   1378 
   1379         If pathname is a package directory, search the directory and
   1380         all package subdirectories recursively for all *.py and enter
   1381         the modules into the archive.  If pathname is a plain
   1382         directory, listdir *.py and enter all modules.  Else, pathname
   1383         must be a Python *.py file and the module will be put into the
   1384         archive.  Added modules are always module.pyo or module.pyc.
   1385         This method will compile the module.py into module.pyc if
   1386         necessary.
   1387         """
   1388         dir, name = os.path.split(pathname)
   1389         if os.path.isdir(pathname):
   1390             initname = os.path.join(pathname, "__init__.py")
   1391             if os.path.isfile(initname):
   1392                 # This is a package directory, add it
   1393                 if basename:
   1394                     basename = "%s/%s" % (basename, name)
   1395                 else:
   1396                     basename = name
   1397                 if self.debug:
   1398                     print "Adding package in", pathname, "as", basename
   1399                 fname, arcname = self._get_codename(initname[0:-3], basename)
   1400                 if self.debug:
   1401                     print "Adding", arcname
   1402                 self.write(fname, arcname)
   1403                 dirlist = os.listdir(pathname)
   1404                 dirlist.remove("__init__.py")
   1405                 # Add all *.py files and package subdirectories
   1406                 for filename in dirlist:
   1407                     path = os.path.join(pathname, filename)
   1408                     root, ext = os.path.splitext(filename)
   1409                     if os.path.isdir(path):
   1410                         if os.path.isfile(os.path.join(path, "__init__.py")):
   1411                             # This is a package directory, add it
   1412                             self.writepy(path, basename)  # Recursive call
   1413                     elif ext == ".py":
   1414                         fname, arcname = self._get_codename(path[0:-3],
   1415                                          basename)
   1416                         if self.debug:
   1417                             print "Adding", arcname
   1418                         self.write(fname, arcname)
   1419             else:
   1420                 # This is NOT a package directory, add its files at top level
   1421                 if self.debug:
   1422                     print "Adding files from directory", pathname
   1423                 for filename in os.listdir(pathname):
   1424                     path = os.path.join(pathname, filename)
   1425                     root, ext = os.path.splitext(filename)
   1426                     if ext == ".py":
   1427                         fname, arcname = self._get_codename(path[0:-3],
   1428                                          basename)
   1429                         if self.debug:
   1430                             print "Adding", arcname
   1431                         self.write(fname, arcname)
   1432         else:
   1433             if pathname[-3:] != ".py":
   1434                 raise RuntimeError, \
   1435                       'Files added with writepy() must end with ".py"'
   1436             fname, arcname = self._get_codename(pathname[0:-3], basename)
   1437             if self.debug:
   1438                 print "Adding file", arcname
   1439             self.write(fname, arcname)
   1440 
   1441     def _get_codename(self, pathname, basename):
   1442         """Return (filename, archivename) for the path.
   1443 
   1444         Given a module name path, return the correct file path and
   1445         archive name, compiling if necessary.  For example, given
   1446         /python/lib/string, return (/python/lib/string.pyc, string).
   1447         """
   1448         file_py  = pathname + ".py"
   1449         file_pyc = pathname + ".pyc"
   1450         file_pyo = pathname + ".pyo"
   1451         if os.path.isfile(file_pyo) and \
   1452                             os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
   1453             fname = file_pyo    # Use .pyo file
   1454         elif not os.path.isfile(file_pyc) or \
   1455              os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
   1456             import py_compile
   1457             if self.debug:
   1458                 print "Compiling", file_py
   1459             try:
   1460                 py_compile.compile(file_py, file_pyc, None, True)
   1461             except py_compile.PyCompileError,err:
   1462                 print err.msg
   1463             fname = file_pyc
   1464         else:
   1465             fname = file_pyc
   1466         archivename = os.path.split(fname)[1]
   1467         if basename:
   1468             archivename = "%s/%s" % (basename, archivename)
   1469         return (fname, archivename)
   1470 
   1471 
   1472 def main(args = None):
   1473     import textwrap
   1474     USAGE=textwrap.dedent("""\
   1475         Usage:
   1476             zipfile.py -l zipfile.zip        # Show listing of a zipfile

   1477             zipfile.py -t zipfile.zip        # Test if a zipfile is valid

   1478             zipfile.py -e zipfile.zip target # Extract zipfile into target dir

   1479             zipfile.py -c zipfile.zip src ... # Create zipfile from sources

   1480         """)
   1481     if args is None:
   1482         args = sys.argv[1:]
   1483 
   1484     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
   1485         print USAGE
   1486         sys.exit(1)
   1487 
   1488     if args[0] == '-l':
   1489         if len(args) != 2:
   1490             print USAGE
   1491             sys.exit(1)
   1492         with ZipFile(args[1], 'r') as zf:
   1493             zf.printdir()
   1494 
   1495     elif args[0] == '-t':
   1496         if len(args) != 2:
   1497             print USAGE
   1498             sys.exit(1)
   1499         with ZipFile(args[1], 'r') as zf:
   1500             badfile = zf.testzip()
   1501         if badfile:
   1502             print("The following enclosed file is corrupted: {!r}".format(badfile))
   1503         print "Done testing"
   1504 
   1505     elif args[0] == '-e':
   1506         if len(args) != 3:
   1507             print USAGE
   1508             sys.exit(1)
   1509 
   1510         with ZipFile(args[1], 'r') as zf:
   1511             zf.extractall(args[2])
   1512 
   1513     elif args[0] == '-c':
   1514         if len(args) < 3:
   1515             print USAGE
   1516             sys.exit(1)
   1517 
   1518         def addToZip(zf, path, zippath):
   1519             if os.path.isfile(path):
   1520                 zf.write(path, zippath, ZIP_DEFLATED)
   1521             elif os.path.isdir(path):
   1522                 if zippath:
   1523                     zf.write(path, zippath)
   1524                 for nm in os.listdir(path):
   1525                     addToZip(zf,
   1526                             os.path.join(path, nm), os.path.join(zippath, nm))
   1527             # else: ignore
   1528 
   1529         with ZipFile(args[1], 'w', allowZip64=True) as zf:
   1530             for path in args[2:]:
   1531                 zippath = os.path.basename(path)
   1532                 if not zippath:
   1533                     zippath = os.path.basename(os.path.dirname(path))
   1534                 if zippath in ('', os.curdir, os.pardir):
   1535                     zippath = ''
   1536                 addToZip(zf, path, zippath)
   1537 
   1538 if __name__ == "__main__":
   1539     main()
   1540