Home | History | Annotate | Download | only in Lib
      1 """
      2 Read and write ZIP files.
      3 """
      4 import struct, os, time, sys, shutil
      5 import binascii, cStringIO, stat
      6 import io
      7 import re
      8 
      9 try:
     10     import zlib # We may need its compression method

     11     crc32 = zlib.crc32
     12 except ImportError:
     13     zlib = None
     14     crc32 = binascii.crc32
     15 
     16 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
     17            "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
     18 
     19 class BadZipfile(Exception):
     20     pass
     21 
     22 
     23 class LargeZipFile(Exception):
     24     """
     25     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
     26     and those extensions are disabled.
     27     """
     28 
     29 error = BadZipfile      # The exception raised by this module

     30 
     31 ZIP64_LIMIT = (1 << 31) - 1
     32 ZIP_FILECOUNT_LIMIT = 1 << 16
     33 ZIP_MAX_COMMENT = (1 << 16) - 1
     34 
     35 # constants for Zip file compression methods

     36 ZIP_STORED = 0
     37 ZIP_DEFLATED = 8
     38 # Other ZIP compression methods not supported

     39 
     40 # Below are some formats and associated data for reading/writing headers using

     41 # the struct module.  The names and structures of headers/records are those used

     42 # in the PKWARE description of the ZIP file format:

     43 #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT

     44 # (URL valid as of January 2008)

     45 
     46 # The "end of central directory" structure, magic number, size, and indices

     47 # (section V.I in the format document)

     48 structEndArchive = "<4s4H2LH"
     49 stringEndArchive = "PK\005\006"
     50 sizeEndCentDir = struct.calcsize(structEndArchive)
     51 
     52 _ECD_SIGNATURE = 0
     53 _ECD_DISK_NUMBER = 1
     54 _ECD_DISK_START = 2
     55 _ECD_ENTRIES_THIS_DISK = 3
     56 _ECD_ENTRIES_TOTAL = 4
     57 _ECD_SIZE = 5
     58 _ECD_OFFSET = 6
     59 _ECD_COMMENT_SIZE = 7
     60 # These last two indices are not part of the structure as defined in the

     61 # spec, but they are used internally by this module as a convenience

     62 _ECD_COMMENT = 8
     63 _ECD_LOCATION = 9
     64 
     65 # The "central directory" structure, magic number, size, and indices

     66 # of entries in the structure (section V.F in the format document)

     67 structCentralDir = "<4s4B4HL2L5H2L"
     68 stringCentralDir = "PK\001\002"
     69 sizeCentralDir = struct.calcsize(structCentralDir)
     70 
     71 # indexes of entries in the central directory structure

     72 _CD_SIGNATURE = 0
     73 _CD_CREATE_VERSION = 1
     74 _CD_CREATE_SYSTEM = 2
     75 _CD_EXTRACT_VERSION = 3
     76 _CD_EXTRACT_SYSTEM = 4
     77 _CD_FLAG_BITS = 5
     78 _CD_COMPRESS_TYPE = 6
     79 _CD_TIME = 7
     80 _CD_DATE = 8
     81 _CD_CRC = 9
     82 _CD_COMPRESSED_SIZE = 10
     83 _CD_UNCOMPRESSED_SIZE = 11
     84 _CD_FILENAME_LENGTH = 12
     85 _CD_EXTRA_FIELD_LENGTH = 13
     86 _CD_COMMENT_LENGTH = 14
     87 _CD_DISK_NUMBER_START = 15
     88 _CD_INTERNAL_FILE_ATTRIBUTES = 16
     89 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
     90 _CD_LOCAL_HEADER_OFFSET = 18
     91 
     92 # The "local file header" structure, magic number, size, and indices

     93 # (section V.A in the format document)

     94 structFileHeader = "<4s2B4HL2L2H"
     95 stringFileHeader = "PK\003\004"
     96 sizeFileHeader = struct.calcsize(structFileHeader)
     97 
     98 _FH_SIGNATURE = 0
     99 _FH_EXTRACT_VERSION = 1
    100 _FH_EXTRACT_SYSTEM = 2
    101 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
    102 _FH_COMPRESSION_METHOD = 4
    103 _FH_LAST_MOD_TIME = 5
    104 _FH_LAST_MOD_DATE = 6
    105 _FH_CRC = 7
    106 _FH_COMPRESSED_SIZE = 8
    107 _FH_UNCOMPRESSED_SIZE = 9
    108 _FH_FILENAME_LENGTH = 10
    109 _FH_EXTRA_FIELD_LENGTH = 11
    110 
    111 # The "Zip64 end of central directory locator" structure, magic number, and size

    112 structEndArchive64Locator = "<4sLQL"
    113 stringEndArchive64Locator = "PK\x06\x07"
    114 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
    115 
    116 # The "Zip64 end of central directory" record, magic number, size, and indices

    117 # (section V.G in the format document)

    118 structEndArchive64 = "<4sQ2H2L4Q"
    119 stringEndArchive64 = "PK\x06\x06"
    120 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
    121 
    122 _CD64_SIGNATURE = 0
    123 _CD64_DIRECTORY_RECSIZE = 1
    124 _CD64_CREATE_VERSION = 2
    125 _CD64_EXTRACT_VERSION = 3
    126 _CD64_DISK_NUMBER = 4
    127 _CD64_DISK_NUMBER_START = 5
    128 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
    129 _CD64_NUMBER_ENTRIES_TOTAL = 7
    130 _CD64_DIRECTORY_SIZE = 8
    131 _CD64_OFFSET_START_CENTDIR = 9
    132 
    133 def _check_zipfile(fp):
    134     try:
    135         if _EndRecData(fp):
    136             return True         # file has correct magic number

    137     except IOError:
    138         pass
    139     return False
    140 
    141 def is_zipfile(filename):
    142     """Quickly see if a file is a ZIP file by checking the magic number.
    143 
    144     The filename argument may be a file or file-like object too.
    145     """
    146     result = False
    147     try:
    148         if hasattr(filename, "read"):
    149             result = _check_zipfile(fp=filename)
    150         else:
    151             with open(filename, "rb") as fp:
    152                 result = _check_zipfile(fp)
    153     except IOError:
    154         pass
    155     return result
    156 
    157 def _EndRecData64(fpin, offset, endrec):
    158     """
    159     Read the ZIP64 end-of-archive records and use that to update endrec
    160     """
    161     try:
    162         fpin.seek(offset - sizeEndCentDir64Locator, 2)
    163     except IOError:
    164         # If the seek fails, the file is not large enough to contain a ZIP64

    165         # end-of-archive record, so just return the end record we were given.

    166         return endrec
    167 
    168     data = fpin.read(sizeEndCentDir64Locator)
    169     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
    170     if sig != stringEndArchive64Locator:
    171         return endrec
    172 
    173     if diskno != 0 or disks != 1:
    174         raise BadZipfile("zipfiles that span multiple disks are not supported")
    175 
    176     # Assume no 'zip64 extensible data'

    177     fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
    178     data = fpin.read(sizeEndCentDir64)
    179     sig, sz, create_version, read_version, disk_num, disk_dir, \
    180             dircount, dircount2, dirsize, diroffset = \
    181             struct.unpack(structEndArchive64, data)
    182     if sig != stringEndArchive64:
    183         return endrec
    184 
    185     # Update the original endrec using data from the ZIP64 record

    186     endrec[_ECD_SIGNATURE] = sig
    187     endrec[_ECD_DISK_NUMBER] = disk_num
    188     endrec[_ECD_DISK_START] = disk_dir
    189     endrec[_ECD_ENTRIES_THIS_DISK] = dircount
    190     endrec[_ECD_ENTRIES_TOTAL] = dircount2
    191     endrec[_ECD_SIZE] = dirsize
    192     endrec[_ECD_OFFSET] = diroffset
    193     return endrec
    194 
    195 
    196 def _EndRecData(fpin):
    197     """Return data from the "End of Central Directory" record, or None.
    198 
    199     The data is a list of the nine items in the ZIP "End of central dir"
    200     record followed by a tenth item, the file seek offset of this record."""
    201 
    202     # Determine file size

    203     fpin.seek(0, 2)
    204     filesize = fpin.tell()
    205 
    206     # Check to see if this is ZIP file with no archive comment (the

    207     # "end of central directory" structure should be the last item in the

    208     # file if this is the case).

    209     try:
    210         fpin.seek(-sizeEndCentDir, 2)
    211     except IOError:
    212         return None
    213     data = fpin.read()
    214     if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
    215         # the signature is correct and there's no comment, unpack structure

    216         endrec = struct.unpack(structEndArchive, data)
    217         endrec=list(endrec)
    218 
    219         # Append a blank comment and record start offset

    220         endrec.append("")
    221         endrec.append(filesize - sizeEndCentDir)
    222 
    223         # Try to read the "Zip64 end of central directory" structure

    224         return _EndRecData64(fpin, -sizeEndCentDir, endrec)
    225 
    226     # Either this is not a ZIP file, or it is a ZIP file with an archive

    227     # comment.  Search the end of the file for the "end of central directory"

    228     # record signature. The comment is the last item in the ZIP file and may be

    229     # up to 64K long.  It is assumed that the "end of central directory" magic

    230     # number does not appear in the comment.

    231     maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
    232     fpin.seek(maxCommentStart, 0)
    233     data = fpin.read()
    234     start = data.rfind(stringEndArchive)
    235     if start >= 0:
    236         # found the magic number; attempt to unpack and interpret

    237         recData = data[start:start+sizeEndCentDir]
    238         endrec = list(struct.unpack(structEndArchive, recData))
    239         comment = data[start+sizeEndCentDir:]
    240         # check that comment length is correct

    241         if endrec[_ECD_COMMENT_SIZE] == len(comment):
    242             # Append the archive comment and start offset

    243             endrec.append(comment)
    244             endrec.append(maxCommentStart + start)
    245 
    246             # Try to read the "Zip64 end of central directory" structure

    247             return _EndRecData64(fpin, maxCommentStart + start - filesize,
    248                                  endrec)
    249 
    250     # Unable to find a valid end of central directory structure

    251     return
    252 
    253 
    254 class ZipInfo (object):
    255     """Class with attributes describing each file in the ZIP archive."""
    256 
    257     __slots__ = (
    258             'orig_filename',
    259             'filename',
    260             'date_time',
    261             'compress_type',
    262             'comment',
    263             'extra',
    264             'create_system',
    265             'create_version',
    266             'extract_version',
    267             'reserved',
    268             'flag_bits',
    269             'volume',
    270             'internal_attr',
    271             'external_attr',
    272             'header_offset',
    273             'CRC',
    274             'compress_size',
    275             'file_size',
    276             '_raw_time',
    277         )
    278 
    279     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
    280         self.orig_filename = filename   # Original file name in archive

    281 
    282         # Terminate the file name at the first null byte.  Null bytes in file

    283         # names are used as tricks by viruses in archives.

    284         null_byte = filename.find(chr(0))
    285         if null_byte >= 0:
    286             filename = filename[0:null_byte]
    287         # This is used to ensure paths in generated ZIP files always use

    288         # forward slashes as the directory separator, as required by the

    289         # ZIP format specification.

    290         if os.sep != "/" and os.sep in filename:
    291             filename = filename.replace(os.sep, "/")
    292 
    293         self.filename = filename        # Normalized file name

    294         self.date_time = date_time      # year, month, day, hour, min, sec

    295         # Standard values:

    296         self.compress_type = ZIP_STORED # Type of compression for the file

    297         self.comment = ""               # Comment for each file

    298         self.extra = ""                 # ZIP extra data

    299         if sys.platform == 'win32':
    300             self.create_system = 0          # System which created ZIP archive

    301         else:
    302             # Assume everything else is unix-y

    303             self.create_system = 3          # System which created ZIP archive

    304         self.create_version = 20        # Version which created ZIP archive

    305         self.extract_version = 20       # Version needed to extract archive

    306         self.reserved = 0               # Must be zero

    307         self.flag_bits = 0              # ZIP flag bits

    308         self.volume = 0                 # Volume number of file header

    309         self.internal_attr = 0          # Internal attributes

    310         self.external_attr = 0          # External file attributes

    311         # Other attributes are set by class ZipFile:

    312         # header_offset         Byte offset to the file header

    313         # CRC                   CRC-32 of the uncompressed file

    314         # compress_size         Size of the compressed file

    315         # file_size             Size of the uncompressed file

    316 
    317     def FileHeader(self):
    318         """Return the per-file header as a string."""
    319         dt = self.date_time
    320         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
    321         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
    322         if self.flag_bits & 0x08:
    323             # Set these to zero because we write them after the file data

    324             CRC = compress_size = file_size = 0
    325         else:
    326             CRC = self.CRC
    327             compress_size = self.compress_size
    328             file_size = self.file_size
    329 
    330         extra = self.extra
    331 
    332         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
    333             # File is larger than what fits into a 4 byte integer,

    334             # fall back to the ZIP64 extension

    335             fmt = '<HHQQ'
    336             extra = extra + struct.pack(fmt,
    337                     1, struct.calcsize(fmt)-4, file_size, compress_size)
    338             file_size = 0xffffffff
    339             compress_size = 0xffffffff
    340             self.extract_version = max(45, self.extract_version)
    341             self.create_version = max(45, self.extract_version)
    342 
    343         filename, flag_bits = self._encodeFilenameFlags()
    344         header = struct.pack(structFileHeader, stringFileHeader,
    345                  self.extract_version, self.reserved, flag_bits,
    346                  self.compress_type, dostime, dosdate, CRC,
    347                  compress_size, file_size,
    348                  len(filename), len(extra))
    349         return header + filename + extra
    350 
    351     def _encodeFilenameFlags(self):
    352         if isinstance(self.filename, unicode):
    353             try:
    354                 return self.filename.encode('ascii'), self.flag_bits
    355             except UnicodeEncodeError:
    356                 return self.filename.encode('utf-8'), self.flag_bits | 0x800
    357         else:
    358             return self.filename, self.flag_bits
    359 
    360     def _decodeFilename(self):
    361         if self.flag_bits & 0x800:
    362             return self.filename.decode('utf-8')
    363         else:
    364             return self.filename
    365 
    366     def _decodeExtra(self):
    367         # Try to decode the extra field.

    368         extra = self.extra
    369         unpack = struct.unpack
    370         while extra:
    371             tp, ln = unpack('<HH', extra[:4])
    372             if tp == 1:
    373                 if ln >= 24:
    374                     counts = unpack('<QQQ', extra[4:28])
    375                 elif ln == 16:
    376                     counts = unpack('<QQ', extra[4:20])
    377                 elif ln == 8:
    378                     counts = unpack('<Q', extra[4:12])
    379                 elif ln == 0:
    380                     counts = ()
    381                 else:
    382                     raise RuntimeError, "Corrupt extra field %s"%(ln,)
    383 
    384                 idx = 0
    385 
    386                 # ZIP64 extension (large files and/or large archives)

    387                 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
    388                     self.file_size = counts[idx]
    389                     idx += 1
    390 
    391                 if self.compress_size == 0xFFFFFFFFL:
    392                     self.compress_size = counts[idx]
    393                     idx += 1
    394 
    395                 if self.header_offset == 0xffffffffL:
    396                     old = self.header_offset
    397                     self.header_offset = counts[idx]
    398                     idx+=1
    399 
    400             extra = extra[ln+4:]
    401 
    402 
    403 class _ZipDecrypter:
    404     """Class to handle decryption of files stored within a ZIP archive.
    405 
    406     ZIP supports a password-based form of encryption. Even though known
    407     plaintext attacks have been found against it, it is still useful
    408     to be able to get data out of such a file.
    409 
    410     Usage:
    411         zd = _ZipDecrypter(mypwd)
    412         plain_char = zd(cypher_char)
    413         plain_text = map(zd, cypher_text)
    414     """
    415 
    416     def _GenerateCRCTable():
    417         """Generate a CRC-32 table.
    418 
    419         ZIP encryption uses the CRC32 one-byte primitive for scrambling some
    420         internal keys. We noticed that a direct implementation is faster than
    421         relying on binascii.crc32().
    422         """
    423         poly = 0xedb88320
    424         table = [0] * 256
    425         for i in range(256):
    426             crc = i
    427             for j in range(8):
    428                 if crc & 1:
    429                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
    430                 else:
    431                     crc = ((crc >> 1) & 0x7FFFFFFF)
    432             table[i] = crc
    433         return table
    434     crctable = _GenerateCRCTable()
    435 
    436     def _crc32(self, ch, crc):
    437         """Compute the CRC32 primitive on one byte."""
    438         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
    439 
    440     def __init__(self, pwd):
    441         self.key0 = 305419896
    442         self.key1 = 591751049
    443         self.key2 = 878082192
    444         for p in pwd:
    445             self._UpdateKeys(p)
    446 
    447     def _UpdateKeys(self, c):
    448         self.key0 = self._crc32(c, self.key0)
    449         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
    450         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
    451         self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
    452 
    453     def __call__(self, c):
    454         """Decrypt a single character."""
    455         c = ord(c)
    456         k = self.key2 | 2
    457         c = c ^ (((k * (k^1)) >> 8) & 255)
    458         c = chr(c)
    459         self._UpdateKeys(c)
    460         return c
    461 
    462 class ZipExtFile(io.BufferedIOBase):
    463     """File-like object for reading an archive member.
    464        Is returned by ZipFile.open().
    465     """
    466 
    467     # Max size supported by decompressor.

    468     MAX_N = 1 << 31 - 1
    469 
    470     # Read from compressed files in 4k blocks.

    471     MIN_READ_SIZE = 4096
    472 
    473     # Search for universal newlines or line chunks.

    474     PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
    475 
    476     def __init__(self, fileobj, mode, zipinfo, decrypter=None):
    477         self._fileobj = fileobj
    478         self._decrypter = decrypter
    479 
    480         self._compress_type = zipinfo.compress_type
    481         self._compress_size = zipinfo.compress_size
    482         self._compress_left = zipinfo.compress_size
    483 
    484         if self._compress_type == ZIP_DEFLATED:
    485             self._decompressor = zlib.decompressobj(-15)
    486         self._unconsumed = ''
    487 
    488         self._readbuffer = ''
    489         self._offset = 0
    490 
    491         self._universal = 'U' in mode
    492         self.newlines = None
    493 
    494         # Adjust read size for encrypted files since the first 12 bytes

    495         # are for the encryption/password information.

    496         if self._decrypter is not None:
    497             self._compress_left -= 12
    498 
    499         self.mode = mode
    500         self.name = zipinfo.filename
    501 
    502         if hasattr(zipinfo, 'CRC'):
    503             self._expected_crc = zipinfo.CRC
    504             self._running_crc = crc32(b'') & 0xffffffff
    505         else:
    506             self._expected_crc = None
    507 
    508     def readline(self, limit=-1):
    509         """Read and return a line from the stream.
    510 
    511         If limit is specified, at most limit bytes will be read.
    512         """
    513 
    514         if not self._universal and limit < 0:
    515             # Shortcut common case - newline found in buffer.

    516             i = self._readbuffer.find('\n', self._offset) + 1
    517             if i > 0:
    518                 line = self._readbuffer[self._offset: i]
    519                 self._offset = i
    520                 return line
    521 
    522         if not self._universal:
    523             return io.BufferedIOBase.readline(self, limit)
    524 
    525         line = ''
    526         while limit < 0 or len(line) < limit:
    527             readahead = self.peek(2)
    528             if readahead == '':
    529                 return line
    530 
    531             #

    532             # Search for universal newlines or line chunks.

    533             #

    534             # The pattern returns either a line chunk or a newline, but not

    535             # both. Combined with peek(2), we are assured that the sequence

    536             # '\r\n' is always retrieved completely and never split into

    537             # separate newlines - '\r', '\n' due to coincidental readaheads.

    538             #

    539             match = self.PATTERN.search(readahead)
    540             newline = match.group('newline')
    541             if newline is not None:
    542                 if self.newlines is None:
    543                     self.newlines = []
    544                 if newline not in self.newlines:
    545                     self.newlines.append(newline)
    546                 self._offset += len(newline)
    547                 return line + '\n'
    548 
    549             chunk = match.group('chunk')
    550             if limit >= 0:
    551                 chunk = chunk[: limit - len(line)]
    552 
    553             self._offset += len(chunk)
    554             line += chunk
    555 
    556         return line
    557 
    558     def peek(self, n=1):
    559         """Returns buffered bytes without advancing the position."""
    560         if n > len(self._readbuffer) - self._offset:
    561             chunk = self.read(n)
    562             self._offset -= len(chunk)
    563 
    564         # Return up to 512 bytes to reduce allocation overhead for tight loops.

    565         return self._readbuffer[self._offset: self._offset + 512]
    566 
    567     def readable(self):
    568         return True
    569 
    570     def read(self, n=-1):
    571         """Read and return up to n bytes.
    572         If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
    573         """
    574         buf = ''
    575         if n is None:
    576             n = -1
    577         while True:
    578             if n < 0:
    579                 data = self.read1(n)
    580             elif n > len(buf):
    581                 data = self.read1(n - len(buf))
    582             else:
    583                 return buf
    584             if len(data) == 0:
    585                 return buf
    586             buf += data
    587 
    588     def _update_crc(self, newdata, eof):
    589         # Update the CRC using the given data.

    590         if self._expected_crc is None:
    591             # No need to compute the CRC if we don't have a reference value

    592             return
    593         self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
    594         # Check the CRC if we're at the end of the file

    595         if eof and self._running_crc != self._expected_crc:
    596             raise BadZipfile("Bad CRC-32 for file %r" % self.name)
    597 
    598     def read1(self, n):
    599         """Read up to n bytes with at most one read() system call."""
    600 
    601         # Simplify algorithm (branching) by transforming negative n to large n.

    602         if n < 0 or n is None:
    603             n = self.MAX_N
    604 
    605         # Bytes available in read buffer.

    606         len_readbuffer = len(self._readbuffer) - self._offset
    607 
    608         # Read from file.

    609         if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
    610             nbytes = n - len_readbuffer - len(self._unconsumed)
    611             nbytes = max(nbytes, self.MIN_READ_SIZE)
    612             nbytes = min(nbytes, self._compress_left)
    613 
    614             data = self._fileobj.read(nbytes)
    615             self._compress_left -= len(data)
    616 
    617             if data and self._decrypter is not None:
    618                 data = ''.join(map(self._decrypter, data))
    619 
    620             if self._compress_type == ZIP_STORED:
    621                 self._update_crc(data, eof=(self._compress_left==0))
    622                 self._readbuffer = self._readbuffer[self._offset:] + data
    623                 self._offset = 0
    624             else:
    625                 # Prepare deflated bytes for decompression.

    626                 self._unconsumed += data
    627 
    628         # Handle unconsumed data.

    629         if (len(self._unconsumed) > 0 and n > len_readbuffer and
    630             self._compress_type == ZIP_DEFLATED):
    631             data = self._decompressor.decompress(
    632                 self._unconsumed,
    633                 max(n - len_readbuffer, self.MIN_READ_SIZE)
    634             )
    635 
    636             self._unconsumed = self._decompressor.unconsumed_tail
    637             eof = len(self._unconsumed) == 0 and self._compress_left == 0
    638             if eof:
    639                 data += self._decompressor.flush()
    640 
    641             self._update_crc(data, eof=eof)
    642             self._readbuffer = self._readbuffer[self._offset:] + data
    643             self._offset = 0
    644 
    645         # Read from buffer.

    646         data = self._readbuffer[self._offset: self._offset + n]
    647         self._offset += len(data)
    648         return data
    649 
    650 
    651 
    652 class ZipFile:
    653     """ Class with methods to open, read, write, close, list zip files.
    654 
    655     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
    656 
    657     file: Either the path to the file, or a file-like object.
    658           If it is a path, the file will be opened and closed by ZipFile.
    659     mode: The mode can be either read "r", write "w" or append "a".
    660     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
    661     allowZip64: if True ZipFile will create files with ZIP64 extensions when
    662                 needed, otherwise it will raise an exception when this would
    663                 be necessary.
    664 
    665     """
    666 
    667     fp = None                   # Set here since __del__ checks it

    668 
    669     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
    670         """Open the ZIP file with mode read "r", write "w" or append "a"."""
    671         if mode not in ("r", "w", "a"):
    672             raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
    673 
    674         if compression == ZIP_STORED:
    675             pass
    676         elif compression == ZIP_DEFLATED:
    677             if not zlib:
    678                 raise RuntimeError,\
    679                       "Compression requires the (missing) zlib module"
    680         else:
    681             raise RuntimeError, "That compression method is not supported"
    682 
    683         self._allowZip64 = allowZip64
    684         self._didModify = False
    685         self.debug = 0  # Level of printing: 0 through 3

    686         self.NameToInfo = {}    # Find file info given name

    687         self.filelist = []      # List of ZipInfo instances for archive

    688         self.compression = compression  # Method of compression

    689         self.mode = key = mode.replace('b', '')[0]
    690         self.pwd = None
    691         self.comment = ''
    692 
    693         # Check if we were passed a file-like object

    694         if isinstance(file, basestring):
    695             self._filePassed = 0
    696             self.filename = file
    697             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
    698             try:
    699                 self.fp = open(file, modeDict[mode])
    700             except IOError:
    701                 if mode == 'a':
    702                     mode = key = 'w'
    703                     self.fp = open(file, modeDict[mode])
    704                 else:
    705                     raise
    706         else:
    707             self._filePassed = 1
    708             self.fp = file
    709             self.filename = getattr(file, 'name', None)
    710 
    711         if key == 'r':
    712             self._GetContents()
    713         elif key == 'w':
    714             # set the modified flag so central directory gets written

    715             # even if no files are added to the archive

    716             self._didModify = True
    717         elif key == 'a':
    718             try:
    719                 # See if file is a zip file

    720                 self._RealGetContents()
    721                 # seek to start of directory and overwrite

    722                 self.fp.seek(self.start_dir, 0)
    723             except BadZipfile:
    724                 # file is not a zip file, just append

    725                 self.fp.seek(0, 2)
    726 
    727                 # set the modified flag so central directory gets written

    728                 # even if no files are added to the archive

    729                 self._didModify = True
    730         else:
    731             if not self._filePassed:
    732                 self.fp.close()
    733                 self.fp = None
    734             raise RuntimeError, 'Mode must be "r", "w" or "a"'
    735 
    736     def __enter__(self):
    737         return self
    738 
    739     def __exit__(self, type, value, traceback):
    740         self.close()
    741 
    742     def _GetContents(self):
    743         """Read the directory, making sure we close the file if the format
    744         is bad."""
    745         try:
    746             self._RealGetContents()
    747         except BadZipfile:
    748             if not self._filePassed:
    749                 self.fp.close()
    750                 self.fp = None
    751             raise
    752 
    753     def _RealGetContents(self):
    754         """Read in the table of contents for the ZIP file."""
    755         fp = self.fp
    756         try:
    757             endrec = _EndRecData(fp)
    758         except IOError:
    759             raise BadZipfile("File is not a zip file")
    760         if not endrec:
    761             raise BadZipfile, "File is not a zip file"
    762         if self.debug > 1:
    763             print endrec
    764         size_cd = endrec[_ECD_SIZE]             # bytes in central directory

    765         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory

    766         self.comment = endrec[_ECD_COMMENT]     # archive comment

    767 
    768         # "concat" is zero, unless zip was concatenated to another file

    769         concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
    770         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
    771             # If Zip64 extension structures are present, account for them

    772             concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
    773 
    774         if self.debug > 2:
    775             inferred = concat + offset_cd
    776             print "given, inferred, offset", offset_cd, inferred, concat
    777         # self.start_dir:  Position of start of central directory

    778         self.start_dir = offset_cd + concat
    779         fp.seek(self.start_dir, 0)
    780         data = fp.read(size_cd)
    781         fp = cStringIO.StringIO(data)
    782         total = 0
    783         while total < size_cd:
    784             centdir = fp.read(sizeCentralDir)
    785             if centdir[0:4] != stringCentralDir:
    786                 raise BadZipfile, "Bad magic number for central directory"
    787             centdir = struct.unpack(structCentralDir, centdir)
    788             if self.debug > 2:
    789                 print centdir
    790             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
    791             # Create ZipInfo instance to store file information

    792             x = ZipInfo(filename)
    793             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
    794             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
    795             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
    796             (x.create_version, x.create_system, x.extract_version, x.reserved,
    797                 x.flag_bits, x.compress_type, t, d,
    798                 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
    799             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
    800             # Convert date/time code to (year, month, day, hour, min, sec)

    801             x._raw_time = t
    802             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
    803                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
    804 
    805             x._decodeExtra()
    806             x.header_offset = x.header_offset + concat
    807             x.filename = x._decodeFilename()
    808             self.filelist.append(x)
    809             self.NameToInfo[x.filename] = x
    810 
    811             # update total bytes read from central directory

    812             total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
    813                      + centdir[_CD_EXTRA_FIELD_LENGTH]
    814                      + centdir[_CD_COMMENT_LENGTH])
    815 
    816             if self.debug > 2:
    817                 print "total", total
    818 
    819 
    820     def namelist(self):
    821         """Return a list of file names in the archive."""
    822         l = []
    823         for data in self.filelist:
    824             l.append(data.filename)
    825         return l
    826 
    827     def infolist(self):
    828         """Return a list of class ZipInfo instances for files in the
    829         archive."""
    830         return self.filelist
    831 
    832     def printdir(self):
    833         """Print a table of contents for the zip file."""
    834         print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
    835         for zinfo in self.filelist:
    836             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
    837             print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
    838 
    839     def testzip(self):
    840         """Read all the files and check the CRC."""
    841         chunk_size = 2 ** 20
    842         for zinfo in self.filelist:
    843             try:
    844                 # Read by chunks, to avoid an OverflowError or a

    845                 # MemoryError with very large embedded files.

    846                 f = self.open(zinfo.filename, "r")
    847                 while f.read(chunk_size):     # Check CRC-32

    848                     pass
    849             except BadZipfile:
    850                 return zinfo.filename
    851 
    852     def getinfo(self, name):
    853         """Return the instance of ZipInfo given 'name'."""
    854         info = self.NameToInfo.get(name)
    855         if info is None:
    856             raise KeyError(
    857                 'There is no item named %r in the archive' % name)
    858 
    859         return info
    860 
    861     def setpassword(self, pwd):
    862         """Set default password for encrypted files."""
    863         self.pwd = pwd
    864 
    865     def read(self, name, pwd=None):
    866         """Return file bytes (as a string) for name."""
    867         return self.open(name, "r", pwd).read()
    868 
    869     def open(self, name, mode="r", pwd=None):
    870         """Return file-like object for 'name'."""
    871         if mode not in ("r", "U", "rU"):
    872             raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
    873         if not self.fp:
    874             raise RuntimeError, \
    875                   "Attempt to read ZIP archive that was already closed"
    876 
    877         # Only open a new file for instances where we were not

    878         # given a file object in the constructor

    879         if self._filePassed:
    880             zef_file = self.fp
    881         else:
    882             zef_file = open(self.filename, 'rb')
    883 
    884         # Make sure we have an info object

    885         if isinstance(name, ZipInfo):
    886             # 'name' is already an info object

    887             zinfo = name
    888         else:
    889             # Get info object for name

    890             zinfo = self.getinfo(name)
    891 
    892         zef_file.seek(zinfo.header_offset, 0)
    893 
    894         # Skip the file header:

    895         fheader = zef_file.read(sizeFileHeader)
    896         if fheader[0:4] != stringFileHeader:
    897             raise BadZipfile, "Bad magic number for file header"
    898 
    899         fheader = struct.unpack(structFileHeader, fheader)
    900         fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
    901         if fheader[_FH_EXTRA_FIELD_LENGTH]:
    902             zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
    903 
    904         if fname != zinfo.orig_filename:
    905             raise BadZipfile, \
    906                       'File name in directory "%s" and header "%s" differ.' % (
    907                           zinfo.orig_filename, fname)
    908 
    909         # check for encrypted flag & handle password

    910         is_encrypted = zinfo.flag_bits & 0x1
    911         zd = None
    912         if is_encrypted:
    913             if not pwd:
    914                 pwd = self.pwd
    915             if not pwd:
    916                 raise RuntimeError, "File %s is encrypted, " \
    917                       "password required for extraction" % name
    918 
    919             zd = _ZipDecrypter(pwd)
    920             # The first 12 bytes in the cypher stream is an encryption header

    921             #  used to strengthen the algorithm. The first 11 bytes are

    922             #  completely random, while the 12th contains the MSB of the CRC,

    923             #  or the MSB of the file time depending on the header type

    924             #  and is used to check the correctness of the password.

    925             bytes = zef_file.read(12)
    926             h = map(zd, bytes[0:12])
    927             if zinfo.flag_bits & 0x8:
    928                 # compare against the file type from extended local headers

    929                 check_byte = (zinfo._raw_time >> 8) & 0xff
    930             else:
    931                 # compare against the CRC otherwise

    932                 check_byte = (zinfo.CRC >> 24) & 0xff
    933             if ord(h[11]) != check_byte:
    934                 raise RuntimeError("Bad password for file", name)
    935 
    936         return  ZipExtFile(zef_file, mode, zinfo, zd)
    937 
    938     def extract(self, member, path=None, pwd=None):
    939         """Extract a member from the archive to the current working directory,
    940            using its full name. Its file information is extracted as accurately
    941            as possible. `member' may be a filename or a ZipInfo object. You can
    942            specify a different directory using `path'.
    943         """
    944         if not isinstance(member, ZipInfo):
    945             member = self.getinfo(member)
    946 
    947         if path is None:
    948             path = os.getcwd()
    949 
    950         return self._extract_member(member, path, pwd)
    951 
    952     def extractall(self, path=None, members=None, pwd=None):
    953         """Extract all members from the archive to the current working
    954            directory. `path' specifies a different directory to extract to.
    955            `members' is optional and must be a subset of the list returned
    956            by namelist().
    957         """
    958         if members is None:
    959             members = self.namelist()
    960 
    961         for zipinfo in members:
    962             self.extract(zipinfo, path, pwd)
    963 
    964     def _extract_member(self, member, targetpath, pwd):
    965         """Extract the ZipInfo object 'member' to a physical
    966            file on the path targetpath.
    967         """
    968         # build the destination pathname, replacing
    969         # forward slashes to platform specific separators.
    970         # Strip trailing path separator, unless it represents the root.
    971         if (targetpath[-1:] in (os.path.sep, os.path.altsep)
    972             and len(os.path.splitdrive(targetpath)[1]) > 1):
    973             targetpath = targetpath[:-1]
    974 
    975         # don't include leading "/" from file name if present
    976         if member.filename[0] == '/':
    977             targetpath = os.path.join(targetpath, member.filename[1:])
    978         else:
    979             targetpath = os.path.join(targetpath, member.filename)
    980 
    981         targetpath = os.path.normpath(targetpath)
    982 
    983         # Create all upper directories if necessary.

    984         upperdirs = os.path.dirname(targetpath)
    985         if upperdirs and not os.path.exists(upperdirs):
    986             os.makedirs(upperdirs)
    987 
    988         if member.filename[-1] == '/':
    989             if not os.path.isdir(targetpath):
    990                 os.mkdir(targetpath)
    991             return targetpath
    992 
    993         source = self.open(member, pwd=pwd)
    994         target = file(targetpath, "wb")
    995         shutil.copyfileobj(source, target)
    996         source.close()
    997         target.close()
    998 
    999         return targetpath
   1000 
   1001     def _writecheck(self, zinfo):
   1002         """Check for errors before writing a file to the archive."""
   1003         if zinfo.filename in self.NameToInfo:
   1004             if self.debug:      # Warning for duplicate names

   1005                 print "Duplicate name:", zinfo.filename
   1006         if self.mode not in ("w", "a"):
   1007             raise RuntimeError, 'write() requires mode "w" or "a"'
   1008         if not self.fp:
   1009             raise RuntimeError, \
   1010                   "Attempt to write ZIP archive that was already closed"
   1011         if zinfo.compress_type == ZIP_DEFLATED and not zlib:
   1012             raise RuntimeError, \
   1013                   "Compression requires the (missing) zlib module"
   1014         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
   1015             raise RuntimeError, \
   1016                   "That compression method is not supported"
   1017         if zinfo.file_size > ZIP64_LIMIT:
   1018             if not self._allowZip64:
   1019                 raise LargeZipFile("Filesize would require ZIP64 extensions")
   1020         if zinfo.header_offset > ZIP64_LIMIT:
   1021             if not self._allowZip64:
   1022                 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
   1023 
   1024     def write(self, filename, arcname=None, compress_type=None):
   1025         """Put the bytes from filename into the archive under the name
   1026         arcname."""
   1027         if not self.fp:
   1028             raise RuntimeError(
   1029                   "Attempt to write to ZIP archive that was already closed")
   1030 
   1031         st = os.stat(filename)
   1032         isdir = stat.S_ISDIR(st.st_mode)
   1033         mtime = time.localtime(st.st_mtime)
   1034         date_time = mtime[0:6]
   1035         # Create ZipInfo instance to store file information

   1036         if arcname is None:
   1037             arcname = filename
   1038         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
   1039         while arcname[0] in (os.sep, os.altsep):
   1040             arcname = arcname[1:]
   1041         if isdir:
   1042             arcname += '/'
   1043         zinfo = ZipInfo(arcname, date_time)
   1044         zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes

   1045         if compress_type is None:
   1046             zinfo.compress_type = self.compression
   1047         else:
   1048             zinfo.compress_type = compress_type
   1049 
   1050         zinfo.file_size = st.st_size
   1051         zinfo.flag_bits = 0x00
   1052         zinfo.header_offset = self.fp.tell()    # Start of header bytes

   1053 
   1054         self._writecheck(zinfo)
   1055         self._didModify = True
   1056 
   1057         if isdir:
   1058             zinfo.file_size = 0
   1059             zinfo.compress_size = 0
   1060             zinfo.CRC = 0
   1061             self.filelist.append(zinfo)
   1062             self.NameToInfo[zinfo.filename] = zinfo
   1063             self.fp.write(zinfo.FileHeader())
   1064             return
   1065 
   1066         with open(filename, "rb") as fp:
   1067             # Must overwrite CRC and sizes with correct data later

   1068             zinfo.CRC = CRC = 0
   1069             zinfo.compress_size = compress_size = 0
   1070             zinfo.file_size = file_size = 0
   1071             self.fp.write(zinfo.FileHeader())
   1072             if zinfo.compress_type == ZIP_DEFLATED:
   1073                 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
   1074                      zlib.DEFLATED, -15)
   1075             else:
   1076                 cmpr = None
   1077             while 1:
   1078                 buf = fp.read(1024 * 8)
   1079                 if not buf:
   1080                     break
   1081                 file_size = file_size + len(buf)
   1082                 CRC = crc32(buf, CRC) & 0xffffffff
   1083                 if cmpr:
   1084                     buf = cmpr.compress(buf)
   1085                     compress_size = compress_size + len(buf)
   1086                 self.fp.write(buf)
   1087         if cmpr:
   1088             buf = cmpr.flush()
   1089             compress_size = compress_size + len(buf)
   1090             self.fp.write(buf)
   1091             zinfo.compress_size = compress_size
   1092         else:
   1093             zinfo.compress_size = file_size
   1094         zinfo.CRC = CRC
   1095         zinfo.file_size = file_size
   1096         # Seek backwards and write CRC and file sizes

   1097         position = self.fp.tell()       # Preserve current position in file

   1098         self.fp.seek(zinfo.header_offset + 14, 0)
   1099         self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
   1100               zinfo.file_size))
   1101         self.fp.seek(position, 0)
   1102         self.filelist.append(zinfo)
   1103         self.NameToInfo[zinfo.filename] = zinfo
   1104 
   1105     def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
   1106         """Write a file into the archive.  The contents is the string
   1107         'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
   1108         the name of the file in the archive."""
   1109         if not isinstance(zinfo_or_arcname, ZipInfo):
   1110             zinfo = ZipInfo(filename=zinfo_or_arcname,
   1111                             date_time=time.localtime(time.time())[:6])
   1112 
   1113             zinfo.compress_type = self.compression
   1114             zinfo.external_attr = 0600 << 16
   1115         else:
   1116             zinfo = zinfo_or_arcname
   1117 
   1118         if not self.fp:
   1119             raise RuntimeError(
   1120                   "Attempt to write to ZIP archive that was already closed")
   1121 
   1122         if compress_type is not None:
   1123             zinfo.compress_type = compress_type
   1124 
   1125         zinfo.file_size = len(bytes)            # Uncompressed size

   1126         zinfo.header_offset = self.fp.tell()    # Start of header bytes

   1127         self._writecheck(zinfo)
   1128         self._didModify = True
   1129         zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum

   1130         if zinfo.compress_type == ZIP_DEFLATED:
   1131             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
   1132                  zlib.DEFLATED, -15)
   1133             bytes = co.compress(bytes) + co.flush()
   1134             zinfo.compress_size = len(bytes)    # Compressed size

   1135         else:
   1136             zinfo.compress_size = zinfo.file_size
   1137         zinfo.header_offset = self.fp.tell()    # Start of header bytes

   1138         self.fp.write(zinfo.FileHeader())
   1139         self.fp.write(bytes)
   1140         self.fp.flush()
   1141         if zinfo.flag_bits & 0x08:
   1142             # Write CRC and file sizes after the file data

   1143             self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
   1144                   zinfo.file_size))
   1145         self.filelist.append(zinfo)
   1146         self.NameToInfo[zinfo.filename] = zinfo
   1147 
   1148     def __del__(self):
   1149         """Call the "close()" method in case the user forgot."""
   1150         self.close()
   1151 
   1152     def close(self):
   1153         """Close the file, and for mode "w" and "a" write the ending
   1154         records."""
   1155         if self.fp is None:
   1156             return
   1157 
   1158         if self.mode in ("w", "a") and self._didModify: # write ending records

   1159             count = 0
   1160             pos1 = self.fp.tell()
   1161             for zinfo in self.filelist:         # write central directory

   1162                 count = count + 1
   1163                 dt = zinfo.date_time
   1164                 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
   1165                 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
   1166                 extra = []
   1167                 if zinfo.file_size > ZIP64_LIMIT \
   1168                         or zinfo.compress_size > ZIP64_LIMIT:
   1169                     extra.append(zinfo.file_size)
   1170                     extra.append(zinfo.compress_size)
   1171                     file_size = 0xffffffff
   1172                     compress_size = 0xffffffff
   1173                 else:
   1174                     file_size = zinfo.file_size
   1175                     compress_size = zinfo.compress_size
   1176 
   1177                 if zinfo.header_offset > ZIP64_LIMIT:
   1178                     extra.append(zinfo.header_offset)
   1179                     header_offset = 0xffffffffL
   1180                 else:
   1181                     header_offset = zinfo.header_offset
   1182 
   1183                 extra_data = zinfo.extra
   1184                 if extra:
   1185                     # Append a ZIP64 field to the extra's

   1186                     extra_data = struct.pack(
   1187                             '<HH' + 'Q'*len(extra),
   1188                             1, 8*len(extra), *extra) + extra_data
   1189 
   1190                     extract_version = max(45, zinfo.extract_version)
   1191                     create_version = max(45, zinfo.create_version)
   1192                 else:
   1193                     extract_version = zinfo.extract_version
   1194                     create_version = zinfo.create_version
   1195 
   1196                 try:
   1197                     filename, flag_bits = zinfo._encodeFilenameFlags()
   1198                     centdir = struct.pack(structCentralDir,
   1199                      stringCentralDir, create_version,
   1200                      zinfo.create_system, extract_version, zinfo.reserved,
   1201                      flag_bits, zinfo.compress_type, dostime, dosdate,
   1202                      zinfo.CRC, compress_size, file_size,
   1203                      len(filename), len(extra_data), len(zinfo.comment),
   1204                      0, zinfo.internal_attr, zinfo.external_attr,
   1205                      header_offset)
   1206                 except DeprecationWarning:
   1207                     print >>sys.stderr, (structCentralDir,
   1208                      stringCentralDir, create_version,
   1209                      zinfo.create_system, extract_version, zinfo.reserved,
   1210                      zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
   1211                      zinfo.CRC, compress_size, file_size,
   1212                      len(zinfo.filename), len(extra_data), len(zinfo.comment),
   1213                      0, zinfo.internal_attr, zinfo.external_attr,
   1214                      header_offset)
   1215                     raise
   1216                 self.fp.write(centdir)
   1217                 self.fp.write(filename)
   1218                 self.fp.write(extra_data)
   1219                 self.fp.write(zinfo.comment)
   1220 
   1221             pos2 = self.fp.tell()
   1222             # Write end-of-zip-archive record

   1223             centDirCount = count
   1224             centDirSize = pos2 - pos1
   1225             centDirOffset = pos1
   1226             if (centDirCount >= ZIP_FILECOUNT_LIMIT or
   1227                 centDirOffset > ZIP64_LIMIT or
   1228                 centDirSize > ZIP64_LIMIT):
   1229                 # Need to write the ZIP64 end-of-archive records

   1230                 zip64endrec = struct.pack(
   1231                         structEndArchive64, stringEndArchive64,
   1232                         44, 45, 45, 0, 0, centDirCount, centDirCount,
   1233                         centDirSize, centDirOffset)
   1234                 self.fp.write(zip64endrec)
   1235 
   1236                 zip64locrec = struct.pack(
   1237                         structEndArchive64Locator,
   1238                         stringEndArchive64Locator, 0, pos2, 1)
   1239                 self.fp.write(zip64locrec)
   1240                 centDirCount = min(centDirCount, 0xFFFF)
   1241                 centDirSize = min(centDirSize, 0xFFFFFFFF)
   1242                 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
   1243 
   1244             # check for valid comment length

   1245             if len(self.comment) >= ZIP_MAX_COMMENT:
   1246                 if self.debug > 0:
   1247                     msg = 'Archive comment is too long; truncating to %d bytes' \
   1248                           % ZIP_MAX_COMMENT
   1249                 self.comment = self.comment[:ZIP_MAX_COMMENT]
   1250 
   1251             endrec = struct.pack(structEndArchive, stringEndArchive,
   1252                                  0, 0, centDirCount, centDirCount,
   1253                                  centDirSize, centDirOffset, len(self.comment))
   1254             self.fp.write(endrec)
   1255             self.fp.write(self.comment)
   1256             self.fp.flush()
   1257 
   1258         if not self._filePassed:
   1259             self.fp.close()
   1260         self.fp = None
   1261 
   1262 
   1263 class PyZipFile(ZipFile):
   1264     """Class to create ZIP archives with Python library files and packages."""
   1265 
   1266     def writepy(self, pathname, basename = ""):
   1267         """Add all files from "pathname" to the ZIP archive.
   1268 
   1269         If pathname is a package directory, search the directory and
   1270         all package subdirectories recursively for all *.py and enter
   1271         the modules into the archive.  If pathname is a plain
   1272         directory, listdir *.py and enter all modules.  Else, pathname
   1273         must be a Python *.py file and the module will be put into the
   1274         archive.  Added modules are always module.pyo or module.pyc.
   1275         This method will compile the module.py into module.pyc if
   1276         necessary.
   1277         """
   1278         dir, name = os.path.split(pathname)
   1279         if os.path.isdir(pathname):
   1280             initname = os.path.join(pathname, "__init__.py")
   1281             if os.path.isfile(initname):
   1282                 # This is a package directory, add it

   1283                 if basename:
   1284                     basename = "%s/%s" % (basename, name)
   1285                 else:
   1286                     basename = name
   1287                 if self.debug:
   1288                     print "Adding package in", pathname, "as", basename
   1289                 fname, arcname = self._get_codename(initname[0:-3], basename)
   1290                 if self.debug:
   1291                     print "Adding", arcname
   1292                 self.write(fname, arcname)
   1293                 dirlist = os.listdir(pathname)
   1294                 dirlist.remove("__init__.py")
   1295                 # Add all *.py files and package subdirectories

   1296                 for filename in dirlist:
   1297                     path = os.path.join(pathname, filename)
   1298                     root, ext = os.path.splitext(filename)
   1299                     if os.path.isdir(path):
   1300                         if os.path.isfile(os.path.join(path, "__init__.py")):
   1301                             # This is a package directory, add it

   1302                             self.writepy(path, basename)  # Recursive call

   1303                     elif ext == ".py":
   1304                         fname, arcname = self._get_codename(path[0:-3],
   1305                                          basename)
   1306                         if self.debug:
   1307                             print "Adding", arcname
   1308                         self.write(fname, arcname)
   1309             else:
   1310                 # This is NOT a package directory, add its files at top level

   1311                 if self.debug:
   1312                     print "Adding files from directory", pathname
   1313                 for filename in os.listdir(pathname):
   1314                     path = os.path.join(pathname, filename)
   1315                     root, ext = os.path.splitext(filename)
   1316                     if ext == ".py":
   1317                         fname, arcname = self._get_codename(path[0:-3],
   1318                                          basename)
   1319                         if self.debug:
   1320                             print "Adding", arcname
   1321                         self.write(fname, arcname)
   1322         else:
   1323             if pathname[-3:] != ".py":
   1324                 raise RuntimeError, \
   1325                       'Files added with writepy() must end with ".py"'
   1326             fname, arcname = self._get_codename(pathname[0:-3], basename)
   1327             if self.debug:
   1328                 print "Adding file", arcname
   1329             self.write(fname, arcname)
   1330 
   1331     def _get_codename(self, pathname, basename):
   1332         """Return (filename, archivename) for the path.
   1333 
   1334         Given a module name path, return the correct file path and
   1335         archive name, compiling if necessary.  For example, given
   1336         /python/lib/string, return (/python/lib/string.pyc, string).
   1337         """
   1338         file_py  = pathname + ".py"
   1339         file_pyc = pathname + ".pyc"
   1340         file_pyo = pathname + ".pyo"
   1341         if os.path.isfile(file_pyo) and \
   1342                             os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
   1343             fname = file_pyo    # Use .pyo file

   1344         elif not os.path.isfile(file_pyc) or \
   1345              os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
   1346             import py_compile
   1347             if self.debug:
   1348                 print "Compiling", file_py
   1349             try:
   1350                 py_compile.compile(file_py, file_pyc, None, True)
   1351             except py_compile.PyCompileError,err:
   1352                 print err.msg
   1353             fname = file_pyc
   1354         else:
   1355             fname = file_pyc
   1356         archivename = os.path.split(fname)[1]
   1357         if basename:
   1358             archivename = "%s/%s" % (basename, archivename)
   1359         return (fname, archivename)
   1360 
   1361 
   1362 def main(args = None):
   1363     import textwrap
   1364     USAGE=textwrap.dedent("""\
   1365         Usage:
   1366             zipfile.py -l zipfile.zip        # Show listing of a zipfile
   1367             zipfile.py -t zipfile.zip        # Test if a zipfile is valid
   1368             zipfile.py -e zipfile.zip target # Extract zipfile into target dir
   1369             zipfile.py -c zipfile.zip src ... # Create zipfile from sources
   1370         """)
   1371     if args is None:
   1372         args = sys.argv[1:]
   1373 
   1374     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
   1375         print USAGE
   1376         sys.exit(1)
   1377 
   1378     if args[0] == '-l':
   1379         if len(args) != 2:
   1380             print USAGE
   1381             sys.exit(1)
   1382         zf = ZipFile(args[1], 'r')
   1383         zf.printdir()
   1384         zf.close()
   1385 
   1386     elif args[0] == '-t':
   1387         if len(args) != 2:
   1388             print USAGE
   1389             sys.exit(1)
   1390         zf = ZipFile(args[1], 'r')
   1391         badfile = zf.testzip()
   1392         if badfile:
   1393             print("The following enclosed file is corrupted: {!r}".format(badfile))
   1394         print "Done testing"
   1395 
   1396     elif args[0] == '-e':
   1397         if len(args) != 3:
   1398             print USAGE
   1399             sys.exit(1)
   1400 
   1401         zf = ZipFile(args[1], 'r')
   1402         out = args[2]
   1403         for path in zf.namelist():
   1404             if path.startswith('./'):
   1405                 tgt = os.path.join(out, path[2:])
   1406             else:
   1407                 tgt = os.path.join(out, path)
   1408 
   1409             tgtdir = os.path.dirname(tgt)
   1410             if not os.path.exists(tgtdir):
   1411                 os.makedirs(tgtdir)
   1412             with open(tgt, 'wb') as fp:
   1413                 fp.write(zf.read(path))
   1414         zf.close()
   1415 
   1416     elif args[0] == '-c':
   1417         if len(args) < 3:
   1418             print USAGE
   1419             sys.exit(1)
   1420 
   1421         def addToZip(zf, path, zippath):
   1422             if os.path.isfile(path):
   1423                 zf.write(path, zippath, ZIP_DEFLATED)
   1424             elif os.path.isdir(path):
   1425                 for nm in os.listdir(path):
   1426                     addToZip(zf,
   1427                             os.path.join(path, nm), os.path.join(zippath, nm))
   1428             # else: ignore

   1429 
   1430         zf = ZipFile(args[1], 'w', allowZip64=True)
   1431         for src in args[2:]:
   1432             addToZip(zf, src, os.path.basename(src))
   1433 
   1434         zf.close()
   1435 
   1436 if __name__ == "__main__":
   1437     main()
   1438