Home | History | Annotate | Download | only in Lib
      1 """
      2 Read and write ZIP files.
      3 
      4 XXX references to utf-8 need further investigation.
      5 """
      6 import io
      7 import os
      8 import re
      9 import importlib.util
     10 import sys
     11 import time
     12 import stat
     13 import shutil
     14 import struct
     15 import binascii
     16 
     17 try:
     18     import threading
     19 except ImportError:
     20     import dummy_threading as threading
     21 
     22 try:
     23     import zlib # We may need its compression method
     24     crc32 = zlib.crc32
     25 except ImportError:
     26     zlib = None
     27     crc32 = binascii.crc32
     28 
     29 try:
     30     import bz2 # We may need its compression method
     31 except ImportError:
     32     bz2 = None
     33 
     34 try:
     35     import lzma # We may need its compression method
     36 except ImportError:
     37     lzma = None
     38 
     39 __all__ = ["BadZipFile", "BadZipfile", "error",
     40            "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
     41            "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
     42 
     43 class BadZipFile(Exception):
     44     pass
     45 
     46 
     47 class LargeZipFile(Exception):
     48     """
     49     Raised when writing a zipfile, the zipfile requires ZIP64 extensions
     50     and those extensions are disabled.
     51     """
     52 
     53 error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
     54 
     55 
     56 ZIP64_LIMIT = (1 << 31) - 1
     57 ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
     58 ZIP_MAX_COMMENT = (1 << 16) - 1
     59 
     60 # constants for Zip file compression methods
     61 ZIP_STORED = 0
     62 ZIP_DEFLATED = 8
     63 ZIP_BZIP2 = 12
     64 ZIP_LZMA = 14
     65 # Other ZIP compression methods not supported
     66 
     67 DEFAULT_VERSION = 20
     68 ZIP64_VERSION = 45
     69 BZIP2_VERSION = 46
     70 LZMA_VERSION = 63
     71 # we recognize (but not necessarily support) all features up to that version
     72 MAX_EXTRACT_VERSION = 63
     73 
     74 # Below are some formats and associated data for reading/writing headers using
     75 # the struct module.  The names and structures of headers/records are those used
     76 # in the PKWARE description of the ZIP file format:
     77 #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
     78 # (URL valid as of January 2008)
     79 
     80 # The "end of central directory" structure, magic number, size, and indices
     81 # (section V.I in the format document)
     82 structEndArchive = b"<4s4H2LH"
     83 stringEndArchive = b"PK\005\006"
     84 sizeEndCentDir = struct.calcsize(structEndArchive)
     85 
     86 _ECD_SIGNATURE = 0
     87 _ECD_DISK_NUMBER = 1
     88 _ECD_DISK_START = 2
     89 _ECD_ENTRIES_THIS_DISK = 3
     90 _ECD_ENTRIES_TOTAL = 4
     91 _ECD_SIZE = 5
     92 _ECD_OFFSET = 6
     93 _ECD_COMMENT_SIZE = 7
     94 # These last two indices are not part of the structure as defined in the
     95 # spec, but they are used internally by this module as a convenience
     96 _ECD_COMMENT = 8
     97 _ECD_LOCATION = 9
     98 
     99 # The "central directory" structure, magic number, size, and indices
    100 # of entries in the structure (section V.F in the format document)
    101 structCentralDir = "<4s4B4HL2L5H2L"
    102 stringCentralDir = b"PK\001\002"
    103 sizeCentralDir = struct.calcsize(structCentralDir)
    104 
    105 # indexes of entries in the central directory structure
    106 _CD_SIGNATURE = 0
    107 _CD_CREATE_VERSION = 1
    108 _CD_CREATE_SYSTEM = 2
    109 _CD_EXTRACT_VERSION = 3
    110 _CD_EXTRACT_SYSTEM = 4
    111 _CD_FLAG_BITS = 5
    112 _CD_COMPRESS_TYPE = 6
    113 _CD_TIME = 7
    114 _CD_DATE = 8
    115 _CD_CRC = 9
    116 _CD_COMPRESSED_SIZE = 10
    117 _CD_UNCOMPRESSED_SIZE = 11
    118 _CD_FILENAME_LENGTH = 12
    119 _CD_EXTRA_FIELD_LENGTH = 13
    120 _CD_COMMENT_LENGTH = 14
    121 _CD_DISK_NUMBER_START = 15
    122 _CD_INTERNAL_FILE_ATTRIBUTES = 16
    123 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
    124 _CD_LOCAL_HEADER_OFFSET = 18
    125 
    126 # The "local file header" structure, magic number, size, and indices
    127 # (section V.A in the format document)
    128 structFileHeader = "<4s2B4HL2L2H"
    129 stringFileHeader = b"PK\003\004"
    130 sizeFileHeader = struct.calcsize(structFileHeader)
    131 
    132 _FH_SIGNATURE = 0
    133 _FH_EXTRACT_VERSION = 1
    134 _FH_EXTRACT_SYSTEM = 2
    135 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
    136 _FH_COMPRESSION_METHOD = 4
    137 _FH_LAST_MOD_TIME = 5
    138 _FH_LAST_MOD_DATE = 6
    139 _FH_CRC = 7
    140 _FH_COMPRESSED_SIZE = 8
    141 _FH_UNCOMPRESSED_SIZE = 9
    142 _FH_FILENAME_LENGTH = 10
    143 _FH_EXTRA_FIELD_LENGTH = 11
    144 
    145 # The "Zip64 end of central directory locator" structure, magic number, and size
    146 structEndArchive64Locator = "<4sLQL"
    147 stringEndArchive64Locator = b"PK\x06\x07"
    148 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
    149 
    150 # The "Zip64 end of central directory" record, magic number, size, and indices
    151 # (section V.G in the format document)
    152 structEndArchive64 = "<4sQ2H2L4Q"
    153 stringEndArchive64 = b"PK\x06\x06"
    154 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
    155 
    156 _CD64_SIGNATURE = 0
    157 _CD64_DIRECTORY_RECSIZE = 1
    158 _CD64_CREATE_VERSION = 2
    159 _CD64_EXTRACT_VERSION = 3
    160 _CD64_DISK_NUMBER = 4
    161 _CD64_DISK_NUMBER_START = 5
    162 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
    163 _CD64_NUMBER_ENTRIES_TOTAL = 7
    164 _CD64_DIRECTORY_SIZE = 8
    165 _CD64_OFFSET_START_CENTDIR = 9
    166 
    167 def _check_zipfile(fp):
    168     try:
    169         if _EndRecData(fp):
    170             return True         # file has correct magic number
    171     except OSError:
    172         pass
    173     return False
    174 
    175 def is_zipfile(filename):
    176     """Quickly see if a file is a ZIP file by checking the magic number.
    177 
    178     The filename argument may be a file or file-like object too.
    179     """
    180     result = False
    181     try:
    182         if hasattr(filename, "read"):
    183             result = _check_zipfile(fp=filename)
    184         else:
    185             with open(filename, "rb") as fp:
    186                 result = _check_zipfile(fp)
    187     except OSError:
    188         pass
    189     return result
    190 
    191 def _EndRecData64(fpin, offset, endrec):
    192     """
    193     Read the ZIP64 end-of-archive records and use that to update endrec
    194     """
    195     try:
    196         fpin.seek(offset - sizeEndCentDir64Locator, 2)
    197     except OSError:
    198         # If the seek fails, the file is not large enough to contain a ZIP64
    199         # end-of-archive record, so just return the end record we were given.
    200         return endrec
    201 
    202     data = fpin.read(sizeEndCentDir64Locator)
    203     if len(data) != sizeEndCentDir64Locator:
    204         return endrec
    205     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
    206     if sig != stringEndArchive64Locator:
    207         return endrec
    208 
    209     if diskno != 0 or disks != 1:
    210         raise BadZipFile("zipfiles that span multiple disks are not supported")
    211 
    212     # Assume no 'zip64 extensible data'
    213     fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
    214     data = fpin.read(sizeEndCentDir64)
    215     if len(data) != sizeEndCentDir64:
    216         return endrec
    217     sig, sz, create_version, read_version, disk_num, disk_dir, \
    218         dircount, dircount2, dirsize, diroffset = \
    219         struct.unpack(structEndArchive64, data)
    220     if sig != stringEndArchive64:
    221         return endrec
    222 
    223     # Update the original endrec using data from the ZIP64 record
    224     endrec[_ECD_SIGNATURE] = sig
    225     endrec[_ECD_DISK_NUMBER] = disk_num
    226     endrec[_ECD_DISK_START] = disk_dir
    227     endrec[_ECD_ENTRIES_THIS_DISK] = dircount
    228     endrec[_ECD_ENTRIES_TOTAL] = dircount2
    229     endrec[_ECD_SIZE] = dirsize
    230     endrec[_ECD_OFFSET] = diroffset
    231     return endrec
    232 
    233 
    234 def _EndRecData(fpin):
    235     """Return data from the "End of Central Directory" record, or None.
    236 
    237     The data is a list of the nine items in the ZIP "End of central dir"
    238     record followed by a tenth item, the file seek offset of this record."""
    239 
    240     # Determine file size
    241     fpin.seek(0, 2)
    242     filesize = fpin.tell()
    243 
    244     # Check to see if this is ZIP file with no archive comment (the
    245     # "end of central directory" structure should be the last item in the
    246     # file if this is the case).
    247     try:
    248         fpin.seek(-sizeEndCentDir, 2)
    249     except OSError:
    250         return None
    251     data = fpin.read()
    252     if (len(data) == sizeEndCentDir and
    253         data[0:4] == stringEndArchive and
    254         data[-2:] == b"\000\000"):
    255         # the signature is correct and there's no comment, unpack structure
    256         endrec = struct.unpack(structEndArchive, data)
    257         endrec=list(endrec)
    258 
    259         # Append a blank comment and record start offset
    260         endrec.append(b"")
    261         endrec.append(filesize - sizeEndCentDir)
    262 
    263         # Try to read the "Zip64 end of central directory" structure
    264         return _EndRecData64(fpin, -sizeEndCentDir, endrec)
    265 
    266     # Either this is not a ZIP file, or it is a ZIP file with an archive
    267     # comment.  Search the end of the file for the "end of central directory"
    268     # record signature. The comment is the last item in the ZIP file and may be
    269     # up to 64K long.  It is assumed that the "end of central directory" magic
    270     # number does not appear in the comment.
    271     maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
    272     fpin.seek(maxCommentStart, 0)
    273     data = fpin.read()
    274     start = data.rfind(stringEndArchive)
    275     if start >= 0:
    276         # found the magic number; attempt to unpack and interpret
    277         recData = data[start:start+sizeEndCentDir]
    278         if len(recData) != sizeEndCentDir:
    279             # Zip file is corrupted.
    280             return None
    281         endrec = list(struct.unpack(structEndArchive, recData))
    282         commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
    283         comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
    284         endrec.append(comment)
    285         endrec.append(maxCommentStart + start)
    286 
    287         # Try to read the "Zip64 end of central directory" structure
    288         return _EndRecData64(fpin, maxCommentStart + start - filesize,
    289                              endrec)
    290 
    291     # Unable to find a valid end of central directory structure
    292     return None
    293 
    294 
    295 class ZipInfo (object):
    296     """Class with attributes describing each file in the ZIP archive."""
    297 
    298     __slots__ = (
    299         'orig_filename',
    300         'filename',
    301         'date_time',
    302         'compress_type',
    303         'comment',
    304         'extra',
    305         'create_system',
    306         'create_version',
    307         'extract_version',
    308         'reserved',
    309         'flag_bits',
    310         'volume',
    311         'internal_attr',
    312         'external_attr',
    313         'header_offset',
    314         'CRC',
    315         'compress_size',
    316         'file_size',
    317         '_raw_time',
    318     )
    319 
    320     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
    321         self.orig_filename = filename   # Original file name in archive
    322 
    323         # Terminate the file name at the first null byte.  Null bytes in file
    324         # names are used as tricks by viruses in archives.
    325         null_byte = filename.find(chr(0))
    326         if null_byte >= 0:
    327             filename = filename[0:null_byte]
    328         # This is used to ensure paths in generated ZIP files always use
    329         # forward slashes as the directory separator, as required by the
    330         # ZIP format specification.
    331         if os.sep != "/" and os.sep in filename:
    332             filename = filename.replace(os.sep, "/")
    333 
    334         self.filename = filename        # Normalized file name
    335         self.date_time = date_time      # year, month, day, hour, min, sec
    336 
    337         if date_time[0] < 1980:
    338             raise ValueError('ZIP does not support timestamps before 1980')
    339 
    340         # Standard values:
    341         self.compress_type = ZIP_STORED # Type of compression for the file
    342         self.comment = b""              # Comment for each file
    343         self.extra = b""                # ZIP extra data
    344         if sys.platform == 'win32':
    345             self.create_system = 0          # System which created ZIP archive
    346         else:
    347             # Assume everything else is unix-y
    348             self.create_system = 3          # System which created ZIP archive
    349         self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
    350         self.extract_version = DEFAULT_VERSION # Version needed to extract archive
    351         self.reserved = 0               # Must be zero
    352         self.flag_bits = 0              # ZIP flag bits
    353         self.volume = 0                 # Volume number of file header
    354         self.internal_attr = 0          # Internal attributes
    355         self.external_attr = 0          # External file attributes
    356         # Other attributes are set by class ZipFile:
    357         # header_offset         Byte offset to the file header
    358         # CRC                   CRC-32 of the uncompressed file
    359         # compress_size         Size of the compressed file
    360         # file_size             Size of the uncompressed file
    361 
    362     def __repr__(self):
    363         result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
    364         if self.compress_type != ZIP_STORED:
    365             result.append(' compress_type=%s' %
    366                           compressor_names.get(self.compress_type,
    367                                                self.compress_type))
    368         hi = self.external_attr >> 16
    369         lo = self.external_attr & 0xFFFF
    370         if hi:
    371             result.append(' filemode=%r' % stat.filemode(hi))
    372         if lo:
    373             result.append(' external_attr=%#x' % lo)
    374         isdir = self.is_dir()
    375         if not isdir or self.file_size:
    376             result.append(' file_size=%r' % self.file_size)
    377         if ((not isdir or self.compress_size) and
    378             (self.compress_type != ZIP_STORED or
    379              self.file_size != self.compress_size)):
    380             result.append(' compress_size=%r' % self.compress_size)
    381         result.append('>')
    382         return ''.join(result)
    383 
    384     def FileHeader(self, zip64=None):
    385         """Return the per-file header as a string."""
    386         dt = self.date_time
    387         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
    388         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
    389         if self.flag_bits & 0x08:
    390             # Set these to zero because we write them after the file data
    391             CRC = compress_size = file_size = 0
    392         else:
    393             CRC = self.CRC
    394             compress_size = self.compress_size
    395             file_size = self.file_size
    396 
    397         extra = self.extra
    398 
    399         min_version = 0
    400         if zip64 is None:
    401             zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
    402         if zip64:
    403             fmt = '<HHQQ'
    404             extra = extra + struct.pack(fmt,
    405                                         1, struct.calcsize(fmt)-4, file_size, compress_size)
    406         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
    407             if not zip64:
    408                 raise LargeZipFile("Filesize would require ZIP64 extensions")
    409             # File is larger than what fits into a 4 byte integer,
    410             # fall back to the ZIP64 extension
    411             file_size = 0xffffffff
    412             compress_size = 0xffffffff
    413             min_version = ZIP64_VERSION
    414 
    415         if self.compress_type == ZIP_BZIP2:
    416             min_version = max(BZIP2_VERSION, min_version)
    417         elif self.compress_type == ZIP_LZMA:
    418             min_version = max(LZMA_VERSION, min_version)
    419 
    420         self.extract_version = max(min_version, self.extract_version)
    421         self.create_version = max(min_version, self.create_version)
    422         filename, flag_bits = self._encodeFilenameFlags()
    423         header = struct.pack(structFileHeader, stringFileHeader,
    424                              self.extract_version, self.reserved, flag_bits,
    425                              self.compress_type, dostime, dosdate, CRC,
    426                              compress_size, file_size,
    427                              len(filename), len(extra))
    428         return header + filename + extra
    429 
    430     def _encodeFilenameFlags(self):
    431         try:
    432             return self.filename.encode('ascii'), self.flag_bits
    433         except UnicodeEncodeError:
    434             return self.filename.encode('utf-8'), self.flag_bits | 0x800
    435 
    436     def _decodeExtra(self):
    437         # Try to decode the extra field.
    438         extra = self.extra
    439         unpack = struct.unpack
    440         while len(extra) >= 4:
    441             tp, ln = unpack('<HH', extra[:4])
    442             if tp == 1:
    443                 if ln >= 24:
    444                     counts = unpack('<QQQ', extra[4:28])
    445                 elif ln == 16:
    446                     counts = unpack('<QQ', extra[4:20])
    447                 elif ln == 8:
    448                     counts = unpack('<Q', extra[4:12])
    449                 elif ln == 0:
    450                     counts = ()
    451                 else:
    452                     raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
    453 
    454                 idx = 0
    455 
    456                 # ZIP64 extension (large files and/or large archives)
    457                 if self.file_size in (0xffffffffffffffff, 0xffffffff):
    458                     self.file_size = counts[idx]
    459                     idx += 1
    460 
    461                 if self.compress_size == 0xFFFFFFFF:
    462                     self.compress_size = counts[idx]
    463                     idx += 1
    464 
    465                 if self.header_offset == 0xffffffff:
    466                     old = self.header_offset
    467                     self.header_offset = counts[idx]
    468                     idx+=1
    469 
    470             extra = extra[ln+4:]
    471 
    472     @classmethod
    473     def from_file(cls, filename, arcname=None):
    474         """Construct an appropriate ZipInfo for a file on the filesystem.
    475 
    476         filename should be the path to a file or directory on the filesystem.
    477 
    478         arcname is the name which it will have within the archive (by default,
    479         this will be the same as filename, but without a drive letter and with
    480         leading path separators removed).
    481         """
    482         st = os.stat(filename)
    483         isdir = stat.S_ISDIR(st.st_mode)
    484         mtime = time.localtime(st.st_mtime)
    485         date_time = mtime[0:6]
    486         # Create ZipInfo instance to store file information
    487         if arcname is None:
    488             arcname = filename
    489         arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
    490         while arcname[0] in (os.sep, os.altsep):
    491             arcname = arcname[1:]
    492         if isdir:
    493             arcname += '/'
    494         zinfo = cls(arcname, date_time)
    495         zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
    496         if isdir:
    497             zinfo.file_size = 0
    498             zinfo.external_attr |= 0x10  # MS-DOS directory flag
    499         else:
    500             zinfo.file_size = st.st_size
    501 
    502         return zinfo
    503 
    504     def is_dir(self):
    505         """Return True if this archive member is a directory."""
    506         return self.filename[-1] == '/'
    507 
    508 
    509 class _ZipDecrypter:
    510     """Class to handle decryption of files stored within a ZIP archive.
    511 
    512     ZIP supports a password-based form of encryption. Even though known
    513     plaintext attacks have been found against it, it is still useful
    514     to be able to get data out of such a file.
    515 
    516     Usage:
    517         zd = _ZipDecrypter(mypwd)
    518         plain_char = zd(cypher_char)
    519         plain_text = map(zd, cypher_text)
    520     """
    521 
    522     def _GenerateCRCTable():
    523         """Generate a CRC-32 table.
    524 
    525         ZIP encryption uses the CRC32 one-byte primitive for scrambling some
    526         internal keys. We noticed that a direct implementation is faster than
    527         relying on binascii.crc32().
    528         """
    529         poly = 0xedb88320
    530         table = [0] * 256
    531         for i in range(256):
    532             crc = i
    533             for j in range(8):
    534                 if crc & 1:
    535                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
    536                 else:
    537                     crc = ((crc >> 1) & 0x7FFFFFFF)
    538             table[i] = crc
    539         return table
    540     crctable = None
    541 
    542     def _crc32(self, ch, crc):
    543         """Compute the CRC32 primitive on one byte."""
    544         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
    545 
    546     def __init__(self, pwd):
    547         if _ZipDecrypter.crctable is None:
    548             _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
    549         self.key0 = 305419896
    550         self.key1 = 591751049
    551         self.key2 = 878082192
    552         for p in pwd:
    553             self._UpdateKeys(p)
    554 
    555     def _UpdateKeys(self, c):
    556         self.key0 = self._crc32(c, self.key0)
    557         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
    558         self.key1 = (self.key1 * 134775813 + 1) & 4294967295
    559         self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
    560 
    561     def __call__(self, c):
    562         """Decrypt a single character."""
    563         assert isinstance(c, int)
    564         k = self.key2 | 2
    565         c = c ^ (((k * (k^1)) >> 8) & 255)
    566         self._UpdateKeys(c)
    567         return c
    568 
    569 
    570 class LZMACompressor:
    571 
    572     def __init__(self):
    573         self._comp = None
    574 
    575     def _init(self):
    576         props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
    577         self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
    578             lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
    579         ])
    580         return struct.pack('<BBH', 9, 4, len(props)) + props
    581 
    582     def compress(self, data):
    583         if self._comp is None:
    584             return self._init() + self._comp.compress(data)
    585         return self._comp.compress(data)
    586 
    587     def flush(self):
    588         if self._comp is None:
    589             return self._init() + self._comp.flush()
    590         return self._comp.flush()
    591 
    592 
    593 class LZMADecompressor:
    594 
    595     def __init__(self):
    596         self._decomp = None
    597         self._unconsumed = b''
    598         self.eof = False
    599 
    600     def decompress(self, data):
    601         if self._decomp is None:
    602             self._unconsumed += data
    603             if len(self._unconsumed) <= 4:
    604                 return b''
    605             psize, = struct.unpack('<H', self._unconsumed[2:4])
    606             if len(self._unconsumed) <= 4 + psize:
    607                 return b''
    608 
    609             self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
    610                 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
    611                                                self._unconsumed[4:4 + psize])
    612             ])
    613             data = self._unconsumed[4 + psize:]
    614             del self._unconsumed
    615 
    616         result = self._decomp.decompress(data)
    617         self.eof = self._decomp.eof
    618         return result
    619 
    620 
    621 compressor_names = {
    622     0: 'store',
    623     1: 'shrink',
    624     2: 'reduce',
    625     3: 'reduce',
    626     4: 'reduce',
    627     5: 'reduce',
    628     6: 'implode',
    629     7: 'tokenize',
    630     8: 'deflate',
    631     9: 'deflate64',
    632     10: 'implode',
    633     12: 'bzip2',
    634     14: 'lzma',
    635     18: 'terse',
    636     19: 'lz77',
    637     97: 'wavpack',
    638     98: 'ppmd',
    639 }
    640 
    641 def _check_compression(compression):
    642     if compression == ZIP_STORED:
    643         pass
    644     elif compression == ZIP_DEFLATED:
    645         if not zlib:
    646             raise RuntimeError(
    647                 "Compression requires the (missing) zlib module")
    648     elif compression == ZIP_BZIP2:
    649         if not bz2:
    650             raise RuntimeError(
    651                 "Compression requires the (missing) bz2 module")
    652     elif compression == ZIP_LZMA:
    653         if not lzma:
    654             raise RuntimeError(
    655                 "Compression requires the (missing) lzma module")
    656     else:
    657         raise NotImplementedError("That compression method is not supported")
    658 
    659 
    660 def _get_compressor(compress_type):
    661     if compress_type == ZIP_DEFLATED:
    662         return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
    663                                 zlib.DEFLATED, -15)
    664     elif compress_type == ZIP_BZIP2:
    665         return bz2.BZ2Compressor()
    666     elif compress_type == ZIP_LZMA:
    667         return LZMACompressor()
    668     else:
    669         return None
    670 
    671 
    672 def _get_decompressor(compress_type):
    673     if compress_type == ZIP_STORED:
    674         return None
    675     elif compress_type == ZIP_DEFLATED:
    676         return zlib.decompressobj(-15)
    677     elif compress_type == ZIP_BZIP2:
    678         return bz2.BZ2Decompressor()
    679     elif compress_type == ZIP_LZMA:
    680         return LZMADecompressor()
    681     else:
    682         descr = compressor_names.get(compress_type)
    683         if descr:
    684             raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
    685         else:
    686             raise NotImplementedError("compression type %d" % (compress_type,))
    687 
    688 
    689 class _SharedFile:
    690     def __init__(self, file, pos, close, lock, writing):
    691         self._file = file
    692         self._pos = pos
    693         self._close = close
    694         self._lock = lock
    695         self._writing = writing
    696 
    697     def read(self, n=-1):
    698         with self._lock:
    699             if self._writing():
    700                 raise ValueError("Can't read from the ZIP file while there "
    701                         "is an open writing handle on it. "
    702                         "Close the writing handle before trying to read.")
    703             self._file.seek(self._pos)
    704             data = self._file.read(n)
    705             self._pos = self._file.tell()
    706             return data
    707 
    708     def close(self):
    709         if self._file is not None:
    710             fileobj = self._file
    711             self._file = None
    712             self._close(fileobj)
    713 
    714 # Provide the tell method for unseekable stream
    715 class _Tellable:
    716     def __init__(self, fp):
    717         self.fp = fp
    718         self.offset = 0
    719 
    720     def write(self, data):
    721         n = self.fp.write(data)
    722         self.offset += n
    723         return n
    724 
    725     def tell(self):
    726         return self.offset
    727 
    728     def flush(self):
    729         self.fp.flush()
    730 
    731     def close(self):
    732         self.fp.close()
    733 
    734 
    735 class ZipExtFile(io.BufferedIOBase):
    736     """File-like object for reading an archive member.
    737        Is returned by ZipFile.open().
    738     """
    739 
    740     # Max size supported by decompressor.
    741     MAX_N = 1 << 31 - 1
    742 
    743     # Read from compressed files in 4k blocks.
    744     MIN_READ_SIZE = 4096
    745 
    746     def __init__(self, fileobj, mode, zipinfo, decrypter=None,
    747                  close_fileobj=False):
    748         self._fileobj = fileobj
    749         self._decrypter = decrypter
    750         self._close_fileobj = close_fileobj
    751 
    752         self._compress_type = zipinfo.compress_type
    753         self._compress_left = zipinfo.compress_size
    754         self._left = zipinfo.file_size
    755 
    756         self._decompressor = _get_decompressor(self._compress_type)
    757 
    758         self._eof = False
    759         self._readbuffer = b''
    760         self._offset = 0
    761 
    762         self.newlines = None
    763 
    764         # Adjust read size for encrypted files since the first 12 bytes
    765         # are for the encryption/password information.
    766         if self._decrypter is not None:
    767             self._compress_left -= 12
    768 
    769         self.mode = mode
    770         self.name = zipinfo.filename
    771 
    772         if hasattr(zipinfo, 'CRC'):
    773             self._expected_crc = zipinfo.CRC
    774             self._running_crc = crc32(b'')
    775         else:
    776             self._expected_crc = None
    777 
    778     def __repr__(self):
    779         result = ['<%s.%s' % (self.__class__.__module__,
    780                               self.__class__.__qualname__)]
    781         if not self.closed:
    782             result.append(' name=%r mode=%r' % (self.name, self.mode))
    783             if self._compress_type != ZIP_STORED:
    784                 result.append(' compress_type=%s' %
    785                               compressor_names.get(self._compress_type,
    786                                                    self._compress_type))
    787         else:
    788             result.append(' [closed]')
    789         result.append('>')
    790         return ''.join(result)
    791 
    792     def readline(self, limit=-1):
    793         """Read and return a line from the stream.
    794 
    795         If limit is specified, at most limit bytes will be read.
    796         """
    797 
    798         if limit < 0:
    799             # Shortcut common case - newline found in buffer.
    800             i = self._readbuffer.find(b'\n', self._offset) + 1
    801             if i > 0:
    802                 line = self._readbuffer[self._offset: i]
    803                 self._offset = i
    804                 return line
    805 
    806         return io.BufferedIOBase.readline(self, limit)
    807 
    808     def peek(self, n=1):
    809         """Returns buffered bytes without advancing the position."""
    810         if n > len(self._readbuffer) - self._offset:
    811             chunk = self.read(n)
    812             if len(chunk) > self._offset:
    813                 self._readbuffer = chunk + self._readbuffer[self._offset:]
    814                 self._offset = 0
    815             else:
    816                 self._offset -= len(chunk)
    817 
    818         # Return up to 512 bytes to reduce allocation overhead for tight loops.
    819         return self._readbuffer[self._offset: self._offset + 512]
    820 
    821     def readable(self):
    822         return True
    823 
    824     def read(self, n=-1):
    825         """Read and return up to n bytes.
    826         If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
    827         """
    828         if n is None or n < 0:
    829             buf = self._readbuffer[self._offset:]
    830             self._readbuffer = b''
    831             self._offset = 0
    832             while not self._eof:
    833                 buf += self._read1(self.MAX_N)
    834             return buf
    835 
    836         end = n + self._offset
    837         if end < len(self._readbuffer):
    838             buf = self._readbuffer[self._offset:end]
    839             self._offset = end
    840             return buf
    841 
    842         n = end - len(self._readbuffer)
    843         buf = self._readbuffer[self._offset:]
    844         self._readbuffer = b''
    845         self._offset = 0
    846         while n > 0 and not self._eof:
    847             data = self._read1(n)
    848             if n < len(data):
    849                 self._readbuffer = data
    850                 self._offset = n
    851                 buf += data[:n]
    852                 break
    853             buf += data
    854             n -= len(data)
    855         return buf
    856 
    857     def _update_crc(self, newdata):
    858         # Update the CRC using the given data.
    859         if self._expected_crc is None:
    860             # No need to compute the CRC if we don't have a reference value
    861             return
    862         self._running_crc = crc32(newdata, self._running_crc)
    863         # Check the CRC if we're at the end of the file
    864         if self._eof and self._running_crc != self._expected_crc:
    865             raise BadZipFile("Bad CRC-32 for file %r" % self.name)
    866 
    867     def read1(self, n):
    868         """Read up to n bytes with at most one read() system call."""
    869 
    870         if n is None or n < 0:
    871             buf = self._readbuffer[self._offset:]
    872             self._readbuffer = b''
    873             self._offset = 0
    874             while not self._eof:
    875                 data = self._read1(self.MAX_N)
    876                 if data:
    877                     buf += data
    878                     break
    879             return buf
    880 
    881         end = n + self._offset
    882         if end < len(self._readbuffer):
    883             buf = self._readbuffer[self._offset:end]
    884             self._offset = end
    885             return buf
    886 
    887         n = end - len(self._readbuffer)
    888         buf = self._readbuffer[self._offset:]
    889         self._readbuffer = b''
    890         self._offset = 0
    891         if n > 0:
    892             while not self._eof:
    893                 data = self._read1(n)
    894                 if n < len(data):
    895                     self._readbuffer = data
    896                     self._offset = n
    897                     buf += data[:n]
    898                     break
    899                 if data:
    900                     buf += data
    901                     break
    902         return buf
    903 
    904     def _read1(self, n):
    905         # Read up to n compressed bytes with at most one read() system call,
    906         # decrypt and decompress them.
    907         if self._eof or n <= 0:
    908             return b''
    909 
    910         # Read from file.
    911         if self._compress_type == ZIP_DEFLATED:
    912             ## Handle unconsumed data.
    913             data = self._decompressor.unconsumed_tail
    914             if n > len(data):
    915                 data += self._read2(n - len(data))
    916         else:
    917             data = self._read2(n)
    918 
    919         if self._compress_type == ZIP_STORED:
    920             self._eof = self._compress_left <= 0
    921         elif self._compress_type == ZIP_DEFLATED:
    922             n = max(n, self.MIN_READ_SIZE)
    923             data = self._decompressor.decompress(data, n)
    924             self._eof = (self._decompressor.eof or
    925                          self._compress_left <= 0 and
    926                          not self._decompressor.unconsumed_tail)
    927             if self._eof:
    928                 data += self._decompressor.flush()
    929         else:
    930             data = self._decompressor.decompress(data)
    931             self._eof = self._decompressor.eof or self._compress_left <= 0
    932 
    933         data = data[:self._left]
    934         self._left -= len(data)
    935         if self._left <= 0:
    936             self._eof = True
    937         self._update_crc(data)
    938         return data
    939 
    940     def _read2(self, n):
    941         if self._compress_left <= 0:
    942             return b''
    943 
    944         n = max(n, self.MIN_READ_SIZE)
    945         n = min(n, self._compress_left)
    946 
    947         data = self._fileobj.read(n)
    948         self._compress_left -= len(data)
    949         if not data:
    950             raise EOFError
    951 
    952         if self._decrypter is not None:
    953             data = bytes(map(self._decrypter, data))
    954         return data
    955 
    956     def close(self):
    957         try:
    958             if self._close_fileobj:
    959                 self._fileobj.close()
    960         finally:
    961             super().close()
    962 
    963 
    964 class _ZipWriteFile(io.BufferedIOBase):
    965     def __init__(self, zf, zinfo, zip64):
    966         self._zinfo = zinfo
    967         self._zip64 = zip64
    968         self._zipfile = zf
    969         self._compressor = _get_compressor(zinfo.compress_type)
    970         self._file_size = 0
    971         self._compress_size = 0
    972         self._crc = 0
    973 
    974     @property
    975     def _fileobj(self):
    976         return self._zipfile.fp
    977 
    978     def writable(self):
    979         return True
    980 
    981     def write(self, data):
    982         nbytes = len(data)
    983         self._file_size += nbytes
    984         self._crc = crc32(data, self._crc)
    985         if self._compressor:
    986             data = self._compressor.compress(data)
    987             self._compress_size += len(data)
    988         self._fileobj.write(data)
    989         return nbytes
    990 
    991     def close(self):
    992         super().close()
    993         # Flush any data from the compressor, and update header info
    994         if self._compressor:
    995             buf = self._compressor.flush()
    996             self._compress_size += len(buf)
    997             self._fileobj.write(buf)
    998             self._zinfo.compress_size = self._compress_size
    999         else:
   1000             self._zinfo.compress_size = self._file_size
   1001         self._zinfo.CRC = self._crc
   1002         self._zinfo.file_size = self._file_size
   1003 
   1004         # Write updated header info
   1005         if self._zinfo.flag_bits & 0x08:
   1006             # Write CRC and file sizes after the file data
   1007             fmt = '<LQQ' if self._zip64 else '<LLL'
   1008             self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
   1009                 self._zinfo.compress_size, self._zinfo.file_size))
   1010             self._zipfile.start_dir = self._fileobj.tell()
   1011         else:
   1012             if not self._zip64:
   1013                 if self._file_size > ZIP64_LIMIT:
   1014                     raise RuntimeError('File size unexpectedly exceeded ZIP64 '
   1015                                        'limit')
   1016                 if self._compress_size > ZIP64_LIMIT:
   1017                     raise RuntimeError('Compressed size unexpectedly exceeded '
   1018                                        'ZIP64 limit')
   1019             # Seek backwards and write file header (which will now include
   1020             # correct CRC and file sizes)
   1021 
   1022             # Preserve current position in file
   1023             self._zipfile.start_dir = self._fileobj.tell()
   1024             self._fileobj.seek(self._zinfo.header_offset)
   1025             self._fileobj.write(self._zinfo.FileHeader(self._zip64))
   1026             self._fileobj.seek(self._zipfile.start_dir)
   1027 
   1028         self._zipfile._writing = False
   1029 
   1030         # Successfully written: Add file to our caches
   1031         self._zipfile.filelist.append(self._zinfo)
   1032         self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
   1033 
   1034 class ZipFile:
   1035     """ Class with methods to open, read, write, close, list zip files.
   1036 
   1037     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
   1038 
   1039     file: Either the path to the file, or a file-like object.
   1040           If it is a path, the file will be opened and closed by ZipFile.
   1041     mode: The mode can be either read 'r', write 'w', exclusive create 'x',
   1042           or append 'a'.
   1043     compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
   1044                  ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
   1045     allowZip64: if True ZipFile will create files with ZIP64 extensions when
   1046                 needed, otherwise it will raise an exception when this would
   1047                 be necessary.
   1048 
   1049     """
   1050 
   1051     fp = None                   # Set here since __del__ checks it
   1052     _windows_illegal_name_trans_table = None
   1053 
   1054     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
   1055         """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
   1056         or append 'a'."""
   1057         if mode not in ('r', 'w', 'x', 'a'):
   1058             raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
   1059 
   1060         _check_compression(compression)
   1061 
   1062         self._allowZip64 = allowZip64
   1063         self._didModify = False
   1064         self.debug = 0  # Level of printing: 0 through 3
   1065         self.NameToInfo = {}    # Find file info given name
   1066         self.filelist = []      # List of ZipInfo instances for archive
   1067         self.compression = compression  # Method of compression
   1068         self.mode = mode
   1069         self.pwd = None
   1070         self._comment = b''
   1071 
   1072         # Check if we were passed a file-like object
   1073         if isinstance(file, str):
   1074             # No, it's a filename
   1075             self._filePassed = 0
   1076             self.filename = file
   1077             modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
   1078                         'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
   1079             filemode = modeDict[mode]
   1080             while True:
   1081                 try:
   1082                     self.fp = io.open(file, filemode)
   1083                 except OSError:
   1084                     if filemode in modeDict:
   1085                         filemode = modeDict[filemode]
   1086                         continue
   1087                     raise
   1088                 break
   1089         else:
   1090             self._filePassed = 1
   1091             self.fp = file
   1092             self.filename = getattr(file, 'name', None)
   1093         self._fileRefCnt = 1
   1094         self._lock = threading.RLock()
   1095         self._seekable = True
   1096         self._writing = False
   1097 
   1098         try:
   1099             if mode == 'r':
   1100                 self._RealGetContents()
   1101             elif mode in ('w', 'x'):
   1102                 # set the modified flag so central directory gets written
   1103                 # even if no files are added to the archive
   1104                 self._didModify = True
   1105                 self._start_disk = 0
   1106                 try:
   1107                     self.start_dir = self.fp.tell()
   1108                 except (AttributeError, OSError):
   1109                     self.fp = _Tellable(self.fp)
   1110                     self.start_dir = 0
   1111                     self._seekable = False
   1112                 else:
   1113                     # Some file-like objects can provide tell() but not seek()
   1114                     try:
   1115                         self.fp.seek(self.start_dir)
   1116                     except (AttributeError, OSError):
   1117                         self._seekable = False
   1118             elif mode == 'a':
   1119                 try:
   1120                     # See if file is a zip file
   1121                     self._RealGetContents()
   1122                     # seek to start of directory and overwrite
   1123                     self.fp.seek(self.start_dir)
   1124                 except BadZipFile:
   1125                     # file is not a zip file, just append
   1126                     self.fp.seek(0, 2)
   1127 
   1128                     # set the modified flag so central directory gets written
   1129                     # even if no files are added to the archive
   1130                     self._didModify = True
   1131                     self.start_dir = self._start_disk = self.fp.tell()
   1132             else:
   1133                 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
   1134         except:
   1135             fp = self.fp
   1136             self.fp = None
   1137             self._fpclose(fp)
   1138             raise
   1139 
   1140     def __enter__(self):
   1141         return self
   1142 
   1143     def __exit__(self, type, value, traceback):
   1144         self.close()
   1145 
   1146     def __repr__(self):
   1147         result = ['<%s.%s' % (self.__class__.__module__,
   1148                               self.__class__.__qualname__)]
   1149         if self.fp is not None:
   1150             if self._filePassed:
   1151                 result.append(' file=%r' % self.fp)
   1152             elif self.filename is not None:
   1153                 result.append(' filename=%r' % self.filename)
   1154             result.append(' mode=%r' % self.mode)
   1155         else:
   1156             result.append(' [closed]')
   1157         result.append('>')
   1158         return ''.join(result)
   1159 
   1160     def _RealGetContents(self):
   1161         """Read in the table of contents for the ZIP file."""
   1162         fp = self.fp
   1163         try:
   1164             endrec = _EndRecData(fp)
   1165         except OSError:
   1166             raise BadZipFile("File is not a zip file")
   1167         if not endrec:
   1168             raise BadZipFile("File is not a zip file")
   1169         if self.debug > 1:
   1170             print(endrec)
   1171         size_cd = endrec[_ECD_SIZE]             # bytes in central directory
   1172         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
   1173         self._comment = endrec[_ECD_COMMENT]    # archive comment
   1174 
   1175         # self._start_disk:  Position of the start of ZIP archive
   1176         # It is zero, unless ZIP was concatenated to another file
   1177         self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd
   1178         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
   1179             # If Zip64 extension structures are present, account for them
   1180             self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
   1181 
   1182         if self.debug > 2:
   1183             inferred = self._start_disk + offset_cd
   1184             print("given, inferred, offset", offset_cd, inferred, self._start_disk)
   1185         # self.start_dir:  Position of start of central directory
   1186         self.start_dir = offset_cd + self._start_disk
   1187         fp.seek(self.start_dir, 0)
   1188         data = fp.read(size_cd)
   1189         fp = io.BytesIO(data)
   1190         total = 0
   1191         while total < size_cd:
   1192             centdir = fp.read(sizeCentralDir)
   1193             if len(centdir) != sizeCentralDir:
   1194                 raise BadZipFile("Truncated central directory")
   1195             centdir = struct.unpack(structCentralDir, centdir)
   1196             if centdir[_CD_SIGNATURE] != stringCentralDir:
   1197                 raise BadZipFile("Bad magic number for central directory")
   1198             if self.debug > 2:
   1199                 print(centdir)
   1200             filename = fp.read(centdir[_CD_FILENAME_LENGTH])
   1201             flags = centdir[5]
   1202             if flags & 0x800:
   1203                 # UTF-8 file names extension
   1204                 filename = filename.decode('utf-8')
   1205             else:
   1206                 # Historical ZIP filename encoding
   1207                 filename = filename.decode('cp437')
   1208             # Create ZipInfo instance to store file information
   1209             x = ZipInfo(filename)
   1210             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
   1211             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
   1212             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
   1213             (x.create_version, x.create_system, x.extract_version, x.reserved,
   1214              x.flag_bits, x.compress_type, t, d,
   1215              x.CRC, x.compress_size, x.file_size) = centdir[1:12]
   1216             if x.extract_version > MAX_EXTRACT_VERSION:
   1217                 raise NotImplementedError("zip file version %.1f" %
   1218                                           (x.extract_version / 10))
   1219             x.volume, x.internal_attr, x.external_attr = centdir[15:18]
   1220             # Convert date/time code to (year, month, day, hour, min, sec)
   1221             x._raw_time = t
   1222             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
   1223                             t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
   1224 
   1225             x._decodeExtra()
   1226             x.header_offset = x.header_offset + self._start_disk
   1227             self.filelist.append(x)
   1228             self.NameToInfo[x.filename] = x
   1229 
   1230             # update total bytes read from central directory
   1231             total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
   1232                      + centdir[_CD_EXTRA_FIELD_LENGTH]
   1233                      + centdir[_CD_COMMENT_LENGTH])
   1234 
   1235             if self.debug > 2:
   1236                 print("total", total)
   1237 
   1238 
   1239     def namelist(self):
   1240         """Return a list of file names in the archive."""
   1241         return [data.filename for data in self.filelist]
   1242 
   1243     def infolist(self):
   1244         """Return a list of class ZipInfo instances for files in the
   1245         archive."""
   1246         return self.filelist
   1247 
   1248     def printdir(self, file=None):
   1249         """Print a table of contents for the zip file."""
   1250         print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
   1251               file=file)
   1252         for zinfo in self.filelist:
   1253             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
   1254             print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
   1255                   file=file)
   1256 
   1257     def testzip(self):
   1258         """Read all the files and check the CRC."""
   1259         chunk_size = 2 ** 20
   1260         for zinfo in self.filelist:
   1261             try:
   1262                 # Read by chunks, to avoid an OverflowError or a
   1263                 # MemoryError with very large embedded files.
   1264                 with self.open(zinfo.filename, "r") as f:
   1265                     while f.read(chunk_size):     # Check CRC-32
   1266                         pass
   1267             except BadZipFile:
   1268                 return zinfo.filename
   1269 
   1270     def getinfo(self, name):
   1271         """Return the instance of ZipInfo given 'name'."""
   1272         info = self.NameToInfo.get(name)
   1273         if info is None:
   1274             raise KeyError(
   1275                 'There is no item named %r in the archive' % name)
   1276 
   1277         return info
   1278 
   1279     def setpassword(self, pwd):
   1280         """Set default password for encrypted files."""
   1281         if pwd and not isinstance(pwd, bytes):
   1282             raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
   1283         if pwd:
   1284             self.pwd = pwd
   1285         else:
   1286             self.pwd = None
   1287 
   1288     @property
   1289     def comment(self):
   1290         """The comment text associated with the ZIP file."""
   1291         return self._comment
   1292 
   1293     @comment.setter
   1294     def comment(self, comment):
   1295         if not isinstance(comment, bytes):
   1296             raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
   1297         # check for valid comment length
   1298         if len(comment) > ZIP_MAX_COMMENT:
   1299             import warnings
   1300             warnings.warn('Archive comment is too long; truncating to %d bytes'
   1301                           % ZIP_MAX_COMMENT, stacklevel=2)
   1302             comment = comment[:ZIP_MAX_COMMENT]
   1303         self._comment = comment
   1304         self._didModify = True
   1305 
   1306     def read(self, name, pwd=None):
   1307         """Return file bytes (as a string) for name."""
   1308         with self.open(name, "r", pwd) as fp:
   1309             return fp.read()
   1310 
   1311     def open(self, name, mode="r", pwd=None, *, force_zip64=False):
   1312         """Return file-like object for 'name'.
   1313 
   1314         name is a string for the file name within the ZIP file, or a ZipInfo
   1315         object.
   1316 
   1317         mode should be 'r' to read a file already in the ZIP file, or 'w' to
   1318         write to a file newly added to the archive.
   1319 
   1320         pwd is the password to decrypt files (only used for reading).
   1321 
   1322         When writing, if the file size is not known in advance but may exceed
   1323         2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
   1324         files.  If the size is known in advance, it is best to pass a ZipInfo
   1325         instance for name, with zinfo.file_size set.
   1326         """
   1327         if mode not in {"r", "w"}:
   1328             raise ValueError('open() requires mode "r" or "w"')
   1329         if pwd and not isinstance(pwd, bytes):
   1330             raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
   1331         if pwd and (mode == "w"):
   1332             raise ValueError("pwd is only supported for reading files")
   1333         if not self.fp:
   1334             raise ValueError(
   1335                 "Attempt to use ZIP archive that was already closed")
   1336 
   1337         # Make sure we have an info object
   1338         if isinstance(name, ZipInfo):
   1339             # 'name' is already an info object
   1340             zinfo = name
   1341         elif mode == 'w':
   1342             zinfo = ZipInfo(name)
   1343             zinfo.compress_type = self.compression
   1344         else:
   1345             # Get info object for name
   1346             zinfo = self.getinfo(name)
   1347 
   1348         if mode == 'w':
   1349             return self._open_to_write(zinfo, force_zip64=force_zip64)
   1350 
   1351         if self._writing:
   1352             raise ValueError("Can't read from the ZIP file while there "
   1353                     "is an open writing handle on it. "
   1354                     "Close the writing handle before trying to read.")
   1355 
   1356         # Open for reading:
   1357         self._fileRefCnt += 1
   1358         zef_file = _SharedFile(self.fp, zinfo.header_offset,
   1359                                self._fpclose, self._lock, lambda: self._writing)
   1360         try:
   1361             # Skip the file header:
   1362             fheader = zef_file.read(sizeFileHeader)
   1363             if len(fheader) != sizeFileHeader:
   1364                 raise BadZipFile("Truncated file header")
   1365             fheader = struct.unpack(structFileHeader, fheader)
   1366             if fheader[_FH_SIGNATURE] != stringFileHeader:
   1367                 raise BadZipFile("Bad magic number for file header")
   1368 
   1369             fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
   1370             if fheader[_FH_EXTRA_FIELD_LENGTH]:
   1371                 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
   1372 
   1373             if zinfo.flag_bits & 0x20:
   1374                 # Zip 2.7: compressed patched data
   1375                 raise NotImplementedError("compressed patched data (flag bit 5)")
   1376 
   1377             if zinfo.flag_bits & 0x40:
   1378                 # strong encryption
   1379                 raise NotImplementedError("strong encryption (flag bit 6)")
   1380 
   1381             if zinfo.flag_bits & 0x800:
   1382                 # UTF-8 filename
   1383                 fname_str = fname.decode("utf-8")
   1384             else:
   1385                 fname_str = fname.decode("cp437")
   1386 
   1387             if fname_str != zinfo.orig_filename:
   1388                 raise BadZipFile(
   1389                     'File name in directory %r and header %r differ.'
   1390                     % (zinfo.orig_filename, fname))
   1391 
   1392             # check for encrypted flag & handle password
   1393             is_encrypted = zinfo.flag_bits & 0x1
   1394             zd = None
   1395             if is_encrypted:
   1396                 if not pwd:
   1397                     pwd = self.pwd
   1398                 if not pwd:
   1399                     raise RuntimeError("File %r is encrypted, password "
   1400                                        "required for extraction" % name)
   1401 
   1402                 zd = _ZipDecrypter(pwd)
   1403                 # The first 12 bytes in the cypher stream is an encryption header
   1404                 #  used to strengthen the algorithm. The first 11 bytes are
   1405                 #  completely random, while the 12th contains the MSB of the CRC,
   1406                 #  or the MSB of the file time depending on the header type
   1407                 #  and is used to check the correctness of the password.
   1408                 header = zef_file.read(12)
   1409                 h = list(map(zd, header[0:12]))
   1410                 if zinfo.flag_bits & 0x8:
   1411                     # compare against the file type from extended local headers
   1412                     check_byte = (zinfo._raw_time >> 8) & 0xff
   1413                 else:
   1414                     # compare against the CRC otherwise
   1415                     check_byte = (zinfo.CRC >> 24) & 0xff
   1416                 if h[11] != check_byte:
   1417                     raise RuntimeError("Bad password for file %r" % name)
   1418 
   1419             return ZipExtFile(zef_file, mode, zinfo, zd, True)
   1420         except:
   1421             zef_file.close()
   1422             raise
   1423 
   1424     def _open_to_write(self, zinfo, force_zip64=False):
   1425         if force_zip64 and not self._allowZip64:
   1426             raise ValueError(
   1427                 "force_zip64 is True, but allowZip64 was False when opening "
   1428                 "the ZIP file."
   1429             )
   1430         if self._writing:
   1431             raise ValueError("Can't write to the ZIP file while there is "
   1432                              "another write handle open on it. "
   1433                              "Close the first handle before opening another.")
   1434 
   1435         # Sizes and CRC are overwritten with correct data after processing the file
   1436         if not hasattr(zinfo, 'file_size'):
   1437             zinfo.file_size = 0
   1438         zinfo.compress_size = 0
   1439         zinfo.CRC = 0
   1440 
   1441         zinfo.flag_bits = 0x00
   1442         if zinfo.compress_type == ZIP_LZMA:
   1443             # Compressed data includes an end-of-stream (EOS) marker
   1444             zinfo.flag_bits |= 0x02
   1445         if not self._seekable:
   1446             zinfo.flag_bits |= 0x08
   1447 
   1448         if not zinfo.external_attr:
   1449             zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
   1450 
   1451         # Compressed size can be larger than uncompressed size
   1452         zip64 = self._allowZip64 and \
   1453                 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
   1454 
   1455         if self._seekable:
   1456             self.fp.seek(self.start_dir)
   1457         zinfo.header_offset = self.fp.tell()
   1458 
   1459         self._writecheck(zinfo)
   1460         self._didModify = True
   1461 
   1462         self.fp.write(zinfo.FileHeader(zip64))
   1463 
   1464         self._writing = True
   1465         return _ZipWriteFile(self, zinfo, zip64)
   1466 
   1467     def extract(self, member, path=None, pwd=None):
   1468         """Extract a member from the archive to the current working directory,
   1469            using its full name. Its file information is extracted as accurately
   1470            as possible. `member' may be a filename or a ZipInfo object. You can
   1471            specify a different directory using `path'.
   1472         """
   1473         if not isinstance(member, ZipInfo):
   1474             member = self.getinfo(member)
   1475 
   1476         if path is None:
   1477             path = os.getcwd()
   1478 
   1479         return self._extract_member(member, path, pwd)
   1480 
   1481     def extractall(self, path=None, members=None, pwd=None):
   1482         """Extract all members from the archive to the current working
   1483            directory. `path' specifies a different directory to extract to.
   1484            `members' is optional and must be a subset of the list returned
   1485            by namelist().
   1486         """
   1487         if members is None:
   1488             members = self.namelist()
   1489 
   1490         for zipinfo in members:
   1491             self.extract(zipinfo, path, pwd)
   1492 
   1493     @classmethod
   1494     def _sanitize_windows_name(cls, arcname, pathsep):
   1495         """Replace bad characters and remove trailing dots from parts."""
   1496         table = cls._windows_illegal_name_trans_table
   1497         if not table:
   1498             illegal = ':<>|"?*'
   1499             table = str.maketrans(illegal, '_' * len(illegal))
   1500             cls._windows_illegal_name_trans_table = table
   1501         arcname = arcname.translate(table)
   1502         # remove trailing dots
   1503         arcname = (x.rstrip('.') for x in arcname.split(pathsep))
   1504         # rejoin, removing empty parts.
   1505         arcname = pathsep.join(x for x in arcname if x)
   1506         return arcname
   1507 
   1508     def _extract_member(self, member, targetpath, pwd):
   1509         """Extract the ZipInfo object 'member' to a physical
   1510            file on the path targetpath.
   1511         """
   1512         # build the destination pathname, replacing
   1513         # forward slashes to platform specific separators.
   1514         arcname = member.filename.replace('/', os.path.sep)
   1515 
   1516         if os.path.altsep:
   1517             arcname = arcname.replace(os.path.altsep, os.path.sep)
   1518         # interpret absolute pathname as relative, remove drive letter or
   1519         # UNC path, redundant separators, "." and ".." components.
   1520         arcname = os.path.splitdrive(arcname)[1]
   1521         invalid_path_parts = ('', os.path.curdir, os.path.pardir)
   1522         arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
   1523                                    if x not in invalid_path_parts)
   1524         if os.path.sep == '\\':
   1525             # filter illegal characters on Windows
   1526             arcname = self._sanitize_windows_name(arcname, os.path.sep)
   1527 
   1528         targetpath = os.path.join(targetpath, arcname)
   1529         targetpath = os.path.normpath(targetpath)
   1530 
   1531         # Create all upper directories if necessary.
   1532         upperdirs = os.path.dirname(targetpath)
   1533         if upperdirs and not os.path.exists(upperdirs):
   1534             os.makedirs(upperdirs)
   1535 
   1536         if member.is_dir():
   1537             if not os.path.isdir(targetpath):
   1538                 os.mkdir(targetpath)
   1539             return targetpath
   1540 
   1541         with self.open(member, pwd=pwd) as source, \
   1542              open(targetpath, "wb") as target:
   1543             shutil.copyfileobj(source, target)
   1544 
   1545         return targetpath
   1546 
   1547     def _writecheck(self, zinfo):
   1548         """Check for errors before writing a file to the archive."""
   1549         if zinfo.filename in self.NameToInfo:
   1550             import warnings
   1551             warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
   1552         if self.mode not in ('w', 'x', 'a'):
   1553             raise ValueError("write() requires mode 'w', 'x', or 'a'")
   1554         if not self.fp:
   1555             raise ValueError(
   1556                 "Attempt to write ZIP archive that was already closed")
   1557         _check_compression(zinfo.compress_type)
   1558         if not self._allowZip64:
   1559             requires_zip64 = None
   1560             if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
   1561                 requires_zip64 = "Files count"
   1562             elif zinfo.file_size > ZIP64_LIMIT:
   1563                 requires_zip64 = "Filesize"
   1564             elif zinfo.header_offset > ZIP64_LIMIT:
   1565                 requires_zip64 = "Zipfile size"
   1566             if requires_zip64:
   1567                 raise LargeZipFile(requires_zip64 +
   1568                                    " would require ZIP64 extensions")
   1569 
   1570     def write(self, filename, arcname=None, compress_type=None):
   1571         """Put the bytes from filename into the archive under the name
   1572         arcname."""
   1573         if not self.fp:
   1574             raise ValueError(
   1575                 "Attempt to write to ZIP archive that was already closed")
   1576         if self._writing:
   1577             raise ValueError(
   1578                 "Can't write to ZIP archive while an open writing handle exists"
   1579             )
   1580 
   1581         zinfo = ZipInfo.from_file(filename, arcname)
   1582 
   1583         if zinfo.is_dir():
   1584             zinfo.compress_size = 0
   1585             zinfo.CRC = 0
   1586         else:
   1587             if compress_type is not None:
   1588                 zinfo.compress_type = compress_type
   1589             else:
   1590                 zinfo.compress_type = self.compression
   1591 
   1592         if zinfo.is_dir():
   1593             with self._lock:
   1594                 if self._seekable:
   1595                     self.fp.seek(self.start_dir)
   1596                 zinfo.header_offset = self.fp.tell()  # Start of header bytes
   1597                 if zinfo.compress_type == ZIP_LZMA:
   1598                 # Compressed data includes an end-of-stream (EOS) marker
   1599                     zinfo.flag_bits |= 0x02
   1600 
   1601                 self._writecheck(zinfo)
   1602                 self._didModify = True
   1603 
   1604                 self.filelist.append(zinfo)
   1605                 self.NameToInfo[zinfo.filename] = zinfo
   1606                 self.fp.write(zinfo.FileHeader(False))
   1607                 self.start_dir = self.fp.tell()
   1608         else:
   1609             with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
   1610                 shutil.copyfileobj(src, dest, 1024*8)
   1611 
   1612     def writestr(self, zinfo_or_arcname, data, compress_type=None):
   1613         """Write a file into the archive.  The contents is 'data', which
   1614         may be either a 'str' or a 'bytes' instance; if it is a 'str',
   1615         it is encoded as UTF-8 first.
   1616         'zinfo_or_arcname' is either a ZipInfo instance or
   1617         the name of the file in the archive."""
   1618         if isinstance(data, str):
   1619             data = data.encode("utf-8")
   1620         if not isinstance(zinfo_or_arcname, ZipInfo):
   1621             zinfo = ZipInfo(filename=zinfo_or_arcname,
   1622                             date_time=time.localtime(time.time())[:6])
   1623             zinfo.compress_type = self.compression
   1624             if zinfo.filename[-1] == '/':
   1625                 zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
   1626                 zinfo.external_attr |= 0x10           # MS-DOS directory flag
   1627             else:
   1628                 zinfo.external_attr = 0o600 << 16     # ?rw-------
   1629         else:
   1630             zinfo = zinfo_or_arcname
   1631 
   1632         if not self.fp:
   1633             raise ValueError(
   1634                 "Attempt to write to ZIP archive that was already closed")
   1635         if self._writing:
   1636             raise ValueError(
   1637                 "Can't write to ZIP archive while an open writing handle exists."
   1638             )
   1639 
   1640         if compress_type is not None:
   1641             zinfo.compress_type = compress_type
   1642 
   1643         zinfo.file_size = len(data)            # Uncompressed size
   1644         with self._lock:
   1645             with self.open(zinfo, mode='w') as dest:
   1646                 dest.write(data)
   1647 
   1648     def __del__(self):
   1649         """Call the "close()" method in case the user forgot."""
   1650         self.close()
   1651 
   1652     def close(self):
   1653         """Close the file, and for mode 'w', 'x' and 'a' write the ending
   1654         records."""
   1655         if self.fp is None:
   1656             return
   1657 
   1658         if self._writing:
   1659             raise ValueError("Can't close the ZIP file while there is "
   1660                              "an open writing handle on it. "
   1661                              "Close the writing handle before closing the zip.")
   1662 
   1663         try:
   1664             if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
   1665                 with self._lock:
   1666                     if self._seekable:
   1667                         self.fp.seek(self.start_dir)
   1668                     self._write_end_record()
   1669         finally:
   1670             fp = self.fp
   1671             self.fp = None
   1672             self._fpclose(fp)
   1673 
   1674     def _write_end_record(self):
   1675         for zinfo in self.filelist:         # write central directory
   1676             dt = zinfo.date_time
   1677             dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
   1678             dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
   1679             extra = []
   1680             if zinfo.file_size > ZIP64_LIMIT \
   1681                or zinfo.compress_size > ZIP64_LIMIT:
   1682                 extra.append(zinfo.file_size)
   1683                 extra.append(zinfo.compress_size)
   1684                 file_size = 0xffffffff
   1685                 compress_size = 0xffffffff
   1686             else:
   1687                 file_size = zinfo.file_size
   1688                 compress_size = zinfo.compress_size
   1689 
   1690             header_offset = zinfo.header_offset - self._start_disk
   1691             if header_offset > ZIP64_LIMIT:
   1692                 extra.append(header_offset)
   1693                 header_offset = 0xffffffff
   1694 
   1695             extra_data = zinfo.extra
   1696             min_version = 0
   1697             if extra:
   1698                 # Append a ZIP64 field to the extra's
   1699                 extra_data = struct.pack(
   1700                     '<HH' + 'Q'*len(extra),
   1701                     1, 8*len(extra), *extra) + extra_data
   1702 
   1703                 min_version = ZIP64_VERSION
   1704 
   1705             if zinfo.compress_type == ZIP_BZIP2:
   1706                 min_version = max(BZIP2_VERSION, min_version)
   1707             elif zinfo.compress_type == ZIP_LZMA:
   1708                 min_version = max(LZMA_VERSION, min_version)
   1709 
   1710             extract_version = max(min_version, zinfo.extract_version)
   1711             create_version = max(min_version, zinfo.create_version)
   1712             try:
   1713                 filename, flag_bits = zinfo._encodeFilenameFlags()
   1714                 centdir = struct.pack(structCentralDir,
   1715                                       stringCentralDir, create_version,
   1716                                       zinfo.create_system, extract_version, zinfo.reserved,
   1717                                       flag_bits, zinfo.compress_type, dostime, dosdate,
   1718                                       zinfo.CRC, compress_size, file_size,
   1719                                       len(filename), len(extra_data), len(zinfo.comment),
   1720                                       0, zinfo.internal_attr, zinfo.external_attr,
   1721                                       header_offset)
   1722             except DeprecationWarning:
   1723                 print((structCentralDir, stringCentralDir, create_version,
   1724                        zinfo.create_system, extract_version, zinfo.reserved,
   1725                        zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
   1726                        zinfo.CRC, compress_size, file_size,
   1727                        len(zinfo.filename), len(extra_data), len(zinfo.comment),
   1728                        0, zinfo.internal_attr, zinfo.external_attr,
   1729                        header_offset), file=sys.stderr)
   1730                 raise
   1731             self.fp.write(centdir)
   1732             self.fp.write(filename)
   1733             self.fp.write(extra_data)
   1734             self.fp.write(zinfo.comment)
   1735 
   1736         pos2 = self.fp.tell()
   1737         # Write end-of-zip-archive record
   1738         centDirCount = len(self.filelist)
   1739         centDirSize = pos2 - self.start_dir
   1740         centDirOffset = self.start_dir - self._start_disk
   1741         requires_zip64 = None
   1742         if centDirCount > ZIP_FILECOUNT_LIMIT:
   1743             requires_zip64 = "Files count"
   1744         elif centDirOffset > ZIP64_LIMIT:
   1745             requires_zip64 = "Central directory offset"
   1746         elif centDirSize > ZIP64_LIMIT:
   1747             requires_zip64 = "Central directory size"
   1748         if requires_zip64:
   1749             # Need to write the ZIP64 end-of-archive records
   1750             if not self._allowZip64:
   1751                 raise LargeZipFile(requires_zip64 +
   1752                                    " would require ZIP64 extensions")
   1753             zip64endrec = struct.pack(
   1754                 structEndArchive64, stringEndArchive64,
   1755                 44, 45, 45, 0, 0, centDirCount, centDirCount,
   1756                 centDirSize, centDirOffset)
   1757             self.fp.write(zip64endrec)
   1758 
   1759             zip64locrec = struct.pack(
   1760                 structEndArchive64Locator,
   1761                 stringEndArchive64Locator, 0, pos2, 1)
   1762             self.fp.write(zip64locrec)
   1763             centDirCount = min(centDirCount, 0xFFFF)
   1764             centDirSize = min(centDirSize, 0xFFFFFFFF)
   1765             centDirOffset = min(centDirOffset, 0xFFFFFFFF)
   1766 
   1767         endrec = struct.pack(structEndArchive, stringEndArchive,
   1768                              0, 0, centDirCount, centDirCount,
   1769                              centDirSize, centDirOffset, len(self._comment))
   1770         self.fp.write(endrec)
   1771         self.fp.write(self._comment)
   1772         self.fp.flush()
   1773 
   1774     def _fpclose(self, fp):
   1775         assert self._fileRefCnt > 0
   1776         self._fileRefCnt -= 1
   1777         if not self._fileRefCnt and not self._filePassed:
   1778             fp.close()
   1779 
   1780 
   1781 class PyZipFile(ZipFile):
   1782     """Class to create ZIP archives with Python library files and packages."""
   1783 
   1784     def __init__(self, file, mode="r", compression=ZIP_STORED,
   1785                  allowZip64=True, optimize=-1):
   1786         ZipFile.__init__(self, file, mode=mode, compression=compression,
   1787                          allowZip64=allowZip64)
   1788         self._optimize = optimize
   1789 
   1790     def writepy(self, pathname, basename="", filterfunc=None):
   1791         """Add all files from "pathname" to the ZIP archive.
   1792 
   1793         If pathname is a package directory, search the directory and
   1794         all package subdirectories recursively for all *.py and enter
   1795         the modules into the archive.  If pathname is a plain
   1796         directory, listdir *.py and enter all modules.  Else, pathname
   1797         must be a Python *.py file and the module will be put into the
   1798         archive.  Added modules are always module.pyc.
   1799         This method will compile the module.py into module.pyc if
   1800         necessary.
   1801         If filterfunc(pathname) is given, it is called with every argument.
   1802         When it is False, the file or directory is skipped.
   1803         """
   1804         if filterfunc and not filterfunc(pathname):
   1805             if self.debug:
   1806                 label = 'path' if os.path.isdir(pathname) else 'file'
   1807                 print('%s %r skipped by filterfunc' % (label, pathname))
   1808             return
   1809         dir, name = os.path.split(pathname)
   1810         if os.path.isdir(pathname):
   1811             initname = os.path.join(pathname, "__init__.py")
   1812             if os.path.isfile(initname):
   1813                 # This is a package directory, add it
   1814                 if basename:
   1815                     basename = "%s/%s" % (basename, name)
   1816                 else:
   1817                     basename = name
   1818                 if self.debug:
   1819                     print("Adding package in", pathname, "as", basename)
   1820                 fname, arcname = self._get_codename(initname[0:-3], basename)
   1821                 if self.debug:
   1822                     print("Adding", arcname)
   1823                 self.write(fname, arcname)
   1824                 dirlist = os.listdir(pathname)
   1825                 dirlist.remove("__init__.py")
   1826                 # Add all *.py files and package subdirectories
   1827                 for filename in dirlist:
   1828                     path = os.path.join(pathname, filename)
   1829                     root, ext = os.path.splitext(filename)
   1830                     if os.path.isdir(path):
   1831                         if os.path.isfile(os.path.join(path, "__init__.py")):
   1832                             # This is a package directory, add it
   1833                             self.writepy(path, basename,
   1834                                          filterfunc=filterfunc)  # Recursive call
   1835                     elif ext == ".py":
   1836                         if filterfunc and not filterfunc(path):
   1837                             if self.debug:
   1838                                 print('file %r skipped by filterfunc' % path)
   1839                             continue
   1840                         fname, arcname = self._get_codename(path[0:-3],
   1841                                                             basename)
   1842                         if self.debug:
   1843                             print("Adding", arcname)
   1844                         self.write(fname, arcname)
   1845             else:
   1846                 # This is NOT a package directory, add its files at top level
   1847                 if self.debug:
   1848                     print("Adding files from directory", pathname)
   1849                 for filename in os.listdir(pathname):
   1850                     path = os.path.join(pathname, filename)
   1851                     root, ext = os.path.splitext(filename)
   1852                     if ext == ".py":
   1853                         if filterfunc and not filterfunc(path):
   1854                             if self.debug:
   1855                                 print('file %r skipped by filterfunc' % path)
   1856                             continue
   1857                         fname, arcname = self._get_codename(path[0:-3],
   1858                                                             basename)
   1859                         if self.debug:
   1860                             print("Adding", arcname)
   1861                         self.write(fname, arcname)
   1862         else:
   1863             if pathname[-3:] != ".py":
   1864                 raise RuntimeError(
   1865                     'Files added with writepy() must end with ".py"')
   1866             fname, arcname = self._get_codename(pathname[0:-3], basename)
   1867             if self.debug:
   1868                 print("Adding file", arcname)
   1869             self.write(fname, arcname)
   1870 
   1871     def _get_codename(self, pathname, basename):
   1872         """Return (filename, archivename) for the path.
   1873 
   1874         Given a module name path, return the correct file path and
   1875         archive name, compiling if necessary.  For example, given
   1876         /python/lib/string, return (/python/lib/string.pyc, string).
   1877         """
   1878         def _compile(file, optimize=-1):
   1879             import py_compile
   1880             if self.debug:
   1881                 print("Compiling", file)
   1882             try:
   1883                 py_compile.compile(file, doraise=True, optimize=optimize)
   1884             except py_compile.PyCompileError as err:
   1885                 print(err.msg)
   1886                 return False
   1887             return True
   1888 
   1889         file_py  = pathname + ".py"
   1890         file_pyc = pathname + ".pyc"
   1891         pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
   1892         pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
   1893         pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
   1894         if self._optimize == -1:
   1895             # legacy mode: use whatever file is present
   1896             if (os.path.isfile(file_pyc) and
   1897                   os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
   1898                 # Use .pyc file.
   1899                 arcname = fname = file_pyc
   1900             elif (os.path.isfile(pycache_opt0) and
   1901                   os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
   1902                 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
   1903                 # file name in the archive.
   1904                 fname = pycache_opt0
   1905                 arcname = file_pyc
   1906             elif (os.path.isfile(pycache_opt1) and
   1907                   os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
   1908                 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
   1909                 # file name in the archive.
   1910                 fname = pycache_opt1
   1911                 arcname = file_pyc
   1912             elif (os.path.isfile(pycache_opt2) and
   1913                   os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
   1914                 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
   1915                 # file name in the archive.
   1916                 fname = pycache_opt2
   1917                 arcname = file_pyc
   1918             else:
   1919                 # Compile py into PEP 3147 pyc file.
   1920                 if _compile(file_py):
   1921                     if sys.flags.optimize == 0:
   1922                         fname = pycache_opt0
   1923                     elif sys.flags.optimize == 1:
   1924                         fname = pycache_opt1
   1925                     else:
   1926                         fname = pycache_opt2
   1927                     arcname = file_pyc
   1928                 else:
   1929                     fname = arcname = file_py
   1930         else:
   1931             # new mode: use given optimization level
   1932             if self._optimize == 0:
   1933                 fname = pycache_opt0
   1934                 arcname = file_pyc
   1935             else:
   1936                 arcname = file_pyc
   1937                 if self._optimize == 1:
   1938                     fname = pycache_opt1
   1939                 elif self._optimize == 2:
   1940                     fname = pycache_opt2
   1941                 else:
   1942                     msg = "invalid value for 'optimize': {!r}".format(self._optimize)
   1943                     raise ValueError(msg)
   1944             if not (os.path.isfile(fname) and
   1945                     os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
   1946                 if not _compile(file_py, optimize=self._optimize):
   1947                     fname = arcname = file_py
   1948         archivename = os.path.split(arcname)[1]
   1949         if basename:
   1950             archivename = "%s/%s" % (basename, archivename)
   1951         return (fname, archivename)
   1952 
   1953 
   1954 def main(args = None):
   1955     import textwrap
   1956     USAGE=textwrap.dedent("""\
   1957         Usage:
   1958             zipfile.py -l zipfile.zip        # Show listing of a zipfile
   1959             zipfile.py -t zipfile.zip        # Test if a zipfile is valid
   1960             zipfile.py -e zipfile.zip target # Extract zipfile into target dir
   1961             zipfile.py -c zipfile.zip src ... # Create zipfile from sources
   1962         """)
   1963     if args is None:
   1964         args = sys.argv[1:]
   1965 
   1966     if not args or args[0] not in ('-l', '-c', '-e', '-t'):
   1967         print(USAGE)
   1968         sys.exit(1)
   1969 
   1970     if args[0] == '-l':
   1971         if len(args) != 2:
   1972             print(USAGE)
   1973             sys.exit(1)
   1974         with ZipFile(args[1], 'r') as zf:
   1975             zf.printdir()
   1976 
   1977     elif args[0] == '-t':
   1978         if len(args) != 2:
   1979             print(USAGE)
   1980             sys.exit(1)
   1981         with ZipFile(args[1], 'r') as zf:
   1982             badfile = zf.testzip()
   1983         if badfile:
   1984             print("The following enclosed file is corrupted: {!r}".format(badfile))
   1985         print("Done testing")
   1986 
   1987     elif args[0] == '-e':
   1988         if len(args) != 3:
   1989             print(USAGE)
   1990             sys.exit(1)
   1991 
   1992         with ZipFile(args[1], 'r') as zf:
   1993             zf.extractall(args[2])
   1994 
   1995     elif args[0] == '-c':
   1996         if len(args) < 3:
   1997             print(USAGE)
   1998             sys.exit(1)
   1999 
   2000         def addToZip(zf, path, zippath):
   2001             if os.path.isfile(path):
   2002                 zf.write(path, zippath, ZIP_DEFLATED)
   2003             elif os.path.isdir(path):
   2004                 if zippath:
   2005                     zf.write(path, zippath)
   2006                 for nm in os.listdir(path):
   2007                     addToZip(zf,
   2008                              os.path.join(path, nm), os.path.join(zippath, nm))
   2009             # else: ignore
   2010 
   2011         with ZipFile(args[1], 'w') as zf:
   2012             for path in args[2:]:
   2013                 zippath = os.path.basename(path)
   2014                 if not zippath:
   2015                     zippath = os.path.basename(os.path.dirname(path))
   2016                 if zippath in ('', os.curdir, os.pardir):
   2017                     zippath = ''
   2018                 addToZip(zf, path, zippath)
   2019 
   2020 if __name__ == "__main__":
   2021     main()
   2022