Home | History | Annotate | Download | only in Lib
      1 #!/usr/bin/env python3
      2 #-------------------------------------------------------------------
      3 # tarfile.py
      4 #-------------------------------------------------------------------
      5 # Copyright (C) 2002 Lars Gustaebel <lars (at] gustaebel.de>
      6 # All rights reserved.
      7 #
      8 # Permission  is  hereby granted,  free  of charge,  to  any person
      9 # obtaining a  copy of  this software  and associated documentation
     10 # files  (the  "Software"),  to   deal  in  the  Software   without
     11 # restriction,  including  without limitation  the  rights to  use,
     12 # copy, modify, merge, publish, distribute, sublicense, and/or sell
     13 # copies  of  the  Software,  and to  permit  persons  to  whom the
     14 # Software  is  furnished  to  do  so,  subject  to  the  following
     15 # conditions:
     16 #
     17 # The above copyright  notice and this  permission notice shall  be
     18 # included in all copies or substantial portions of the Software.
     19 #
     20 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
     21 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
     22 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
     23 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
     24 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
     25 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
     26 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     27 # OTHER DEALINGS IN THE SOFTWARE.
     28 #
     29 """Read from and write to tar format archives.
     30 """
     31 
     32 version     = "0.9.0"
     33 __author__  = "Lars Gust\u00e4bel (lars (at] gustaebel.de)"
     34 __date__    = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
     35 __cvsid__   = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
     36 __credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
     37 
     38 #---------
     39 # Imports
     40 #---------
     41 from builtins import open as bltn_open
     42 import sys
     43 import os
     44 import io
     45 import shutil
     46 import stat
     47 import time
     48 import struct
     49 import copy
     50 import re
     51 
     52 try:
     53     import pwd
     54 except ImportError:
     55     pwd = None
     56 try:
     57     import grp
     58 except ImportError:
     59     grp = None
     60 
     61 # os.symlink on Windows prior to 6.0 raises NotImplementedError
     62 symlink_exception = (AttributeError, NotImplementedError)
     63 try:
     64     # OSError (winerror=1314) will be raised if the caller does not hold the
     65     # SeCreateSymbolicLinkPrivilege privilege
     66     symlink_exception += (OSError,)
     67 except NameError:
     68     pass
     69 
     70 # from tarfile import *
     71 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
     72            "CompressionError", "StreamError", "ExtractError", "HeaderError",
     73            "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
     74            "DEFAULT_FORMAT", "open"]
     75 
     76 #---------------------------------------------------------
     77 # tar constants
     78 #---------------------------------------------------------
     79 NUL = b"\0"                     # the null character
     80 BLOCKSIZE = 512                 # length of processing blocks
     81 RECORDSIZE = BLOCKSIZE * 20     # length of records
     82 GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
     83 POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
     84 
     85 LENGTH_NAME = 100               # maximum length of a filename
     86 LENGTH_LINK = 100               # maximum length of a linkname
     87 LENGTH_PREFIX = 155             # maximum length of the prefix field
     88 
     89 REGTYPE = b"0"                  # regular file
     90 AREGTYPE = b"\0"                # regular file
     91 LNKTYPE = b"1"                  # link (inside tarfile)
     92 SYMTYPE = b"2"                  # symbolic link
     93 CHRTYPE = b"3"                  # character special device
     94 BLKTYPE = b"4"                  # block special device
     95 DIRTYPE = b"5"                  # directory
     96 FIFOTYPE = b"6"                 # fifo special device
     97 CONTTYPE = b"7"                 # contiguous file
     98 
     99 GNUTYPE_LONGNAME = b"L"         # GNU tar longname
    100 GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
    101 GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
    102 
    103 XHDTYPE = b"x"                  # POSIX.1-2001 extended header
    104 XGLTYPE = b"g"                  # POSIX.1-2001 global header
    105 SOLARIS_XHDTYPE = b"X"          # Solaris extended header
    106 
    107 USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
    108 GNU_FORMAT = 1                  # GNU tar format
    109 PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
    110 DEFAULT_FORMAT = GNU_FORMAT
    111 
    112 #---------------------------------------------------------
    113 # tarfile constants
    114 #---------------------------------------------------------
    115 # File types that tarfile supports:
    116 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
    117                    SYMTYPE, DIRTYPE, FIFOTYPE,
    118                    CONTTYPE, CHRTYPE, BLKTYPE,
    119                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
    120                    GNUTYPE_SPARSE)
    121 
    122 # File types that will be treated as a regular file.
    123 REGULAR_TYPES = (REGTYPE, AREGTYPE,
    124                  CONTTYPE, GNUTYPE_SPARSE)
    125 
    126 # File types that are part of the GNU tar format.
    127 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
    128              GNUTYPE_SPARSE)
    129 
    130 # Fields from a pax header that override a TarInfo attribute.
    131 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
    132               "uid", "gid", "uname", "gname")
    133 
    134 # Fields from a pax header that are affected by hdrcharset.
    135 PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
    136 
    137 # Fields in a pax header that are numbers, all other fields
    138 # are treated as strings.
    139 PAX_NUMBER_FIELDS = {
    140     "atime": float,
    141     "ctime": float,
    142     "mtime": float,
    143     "uid": int,
    144     "gid": int,
    145     "size": int
    146 }
    147 
    148 #---------------------------------------------------------
    149 # initialization
    150 #---------------------------------------------------------
    151 if os.name == "nt":
    152     ENCODING = "utf-8"
    153 else:
    154     ENCODING = sys.getfilesystemencoding()
    155 
    156 #---------------------------------------------------------
    157 # Some useful functions
    158 #---------------------------------------------------------
    159 
    160 def stn(s, length, encoding, errors):
    161     """Convert a string to a null-terminated bytes object.
    162     """
    163     s = s.encode(encoding, errors)
    164     return s[:length] + (length - len(s)) * NUL
    165 
    166 def nts(s, encoding, errors):
    167     """Convert a null-terminated bytes object to a string.
    168     """
    169     p = s.find(b"\0")
    170     if p != -1:
    171         s = s[:p]
    172     return s.decode(encoding, errors)
    173 
    174 def nti(s):
    175     """Convert a number field to a python number.
    176     """
    177     # There are two possible encodings for a number field, see
    178     # itn() below.
    179     if s[0] in (0o200, 0o377):
    180         n = 0
    181         for i in range(len(s) - 1):
    182             n <<= 8
    183             n += s[i + 1]
    184         if s[0] == 0o377:
    185             n = -(256 ** (len(s) - 1) - n)
    186     else:
    187         try:
    188             s = nts(s, "ascii", "strict")
    189             n = int(s.strip() or "0", 8)
    190         except ValueError:
    191             raise InvalidHeaderError("invalid header")
    192     return n
    193 
    194 def itn(n, digits=8, format=DEFAULT_FORMAT):
    195     """Convert a python number to a number field.
    196     """
    197     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
    198     # octal digits followed by a null-byte, this allows values up to
    199     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
    200     # that if necessary. A leading 0o200 or 0o377 byte indicate this
    201     # particular encoding, the following digits-1 bytes are a big-endian
    202     # base-256 representation. This allows values up to (256**(digits-1))-1.
    203     # A 0o200 byte indicates a positive number, a 0o377 byte a negative
    204     # number.
    205     if 0 <= n < 8 ** (digits - 1):
    206         s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
    207     elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
    208         if n >= 0:
    209             s = bytearray([0o200])
    210         else:
    211             s = bytearray([0o377])
    212             n = 256 ** digits + n
    213 
    214         for i in range(digits - 1):
    215             s.insert(1, n & 0o377)
    216             n >>= 8
    217     else:
    218         raise ValueError("overflow in number field")
    219 
    220     return s
    221 
    222 def calc_chksums(buf):
    223     """Calculate the checksum for a member's header by summing up all
    224        characters except for the chksum field which is treated as if
    225        it was filled with spaces. According to the GNU tar sources,
    226        some tars (Sun and NeXT) calculate chksum with signed char,
    227        which will be different if there are chars in the buffer with
    228        the high bit set. So we calculate two checksums, unsigned and
    229        signed.
    230     """
    231     unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
    232     signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
    233     return unsigned_chksum, signed_chksum
    234 
    235 def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
    236     """Copy length bytes from fileobj src to fileobj dst.
    237        If length is None, copy the entire content.
    238     """
    239     bufsize = bufsize or 16 * 1024
    240     if length == 0:
    241         return
    242     if length is None:
    243         shutil.copyfileobj(src, dst, bufsize)
    244         return
    245 
    246     blocks, remainder = divmod(length, bufsize)
    247     for b in range(blocks):
    248         buf = src.read(bufsize)
    249         if len(buf) < bufsize:
    250             raise exception("unexpected end of data")
    251         dst.write(buf)
    252 
    253     if remainder != 0:
    254         buf = src.read(remainder)
    255         if len(buf) < remainder:
    256             raise exception("unexpected end of data")
    257         dst.write(buf)
    258     return
    259 
    260 def filemode(mode):
    261     """Deprecated in this location; use stat.filemode."""
    262     import warnings
    263     warnings.warn("deprecated in favor of stat.filemode",
    264                   DeprecationWarning, 2)
    265     return stat.filemode(mode)
    266 
    267 def _safe_print(s):
    268     encoding = getattr(sys.stdout, 'encoding', None)
    269     if encoding is not None:
    270         s = s.encode(encoding, 'backslashreplace').decode(encoding)
    271     print(s, end=' ')
    272 
    273 
    274 class TarError(Exception):
    275     """Base exception."""
    276     pass
    277 class ExtractError(TarError):
    278     """General exception for extract errors."""
    279     pass
    280 class ReadError(TarError):
    281     """Exception for unreadable tar archives."""
    282     pass
    283 class CompressionError(TarError):
    284     """Exception for unavailable compression methods."""
    285     pass
    286 class StreamError(TarError):
    287     """Exception for unsupported operations on stream-like TarFiles."""
    288     pass
    289 class HeaderError(TarError):
    290     """Base exception for header errors."""
    291     pass
    292 class EmptyHeaderError(HeaderError):
    293     """Exception for empty headers."""
    294     pass
    295 class TruncatedHeaderError(HeaderError):
    296     """Exception for truncated headers."""
    297     pass
    298 class EOFHeaderError(HeaderError):
    299     """Exception for end of file headers."""
    300     pass
    301 class InvalidHeaderError(HeaderError):
    302     """Exception for invalid headers."""
    303     pass
    304 class SubsequentHeaderError(HeaderError):
    305     """Exception for missing and invalid extended headers."""
    306     pass
    307 
    308 #---------------------------
    309 # internal stream interface
    310 #---------------------------
    311 class _LowLevelFile:
    312     """Low-level file object. Supports reading and writing.
    313        It is used instead of a regular file object for streaming
    314        access.
    315     """
    316 
    317     def __init__(self, name, mode):
    318         mode = {
    319             "r": os.O_RDONLY,
    320             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
    321         }[mode]
    322         if hasattr(os, "O_BINARY"):
    323             mode |= os.O_BINARY
    324         self.fd = os.open(name, mode, 0o666)
    325 
    326     def close(self):
    327         os.close(self.fd)
    328 
    329     def read(self, size):
    330         return os.read(self.fd, size)
    331 
    332     def write(self, s):
    333         os.write(self.fd, s)
    334 
    335 class _Stream:
    336     """Class that serves as an adapter between TarFile and
    337        a stream-like object.  The stream-like object only
    338        needs to have a read() or write() method and is accessed
    339        blockwise.  Use of gzip or bzip2 compression is possible.
    340        A stream-like object could be for example: sys.stdin,
    341        sys.stdout, a socket, a tape device etc.
    342 
    343        _Stream is intended to be used only internally.
    344     """
    345 
    346     def __init__(self, name, mode, comptype, fileobj, bufsize):
    347         """Construct a _Stream object.
    348         """
    349         self._extfileobj = True
    350         if fileobj is None:
    351             fileobj = _LowLevelFile(name, mode)
    352             self._extfileobj = False
    353 
    354         if comptype == '*':
    355             # Enable transparent compression detection for the
    356             # stream interface
    357             fileobj = _StreamProxy(fileobj)
    358             comptype = fileobj.getcomptype()
    359 
    360         self.name     = name or ""
    361         self.mode     = mode
    362         self.comptype = comptype
    363         self.fileobj  = fileobj
    364         self.bufsize  = bufsize
    365         self.buf      = b""
    366         self.pos      = 0
    367         self.closed   = False
    368 
    369         try:
    370             if comptype == "gz":
    371                 try:
    372                     import zlib
    373                 except ImportError:
    374                     raise CompressionError("zlib module is not available")
    375                 self.zlib = zlib
    376                 self.crc = zlib.crc32(b"")
    377                 if mode == "r":
    378                     self._init_read_gz()
    379                     self.exception = zlib.error
    380                 else:
    381                     self._init_write_gz()
    382 
    383             elif comptype == "bz2":
    384                 try:
    385                     import bz2
    386                 except ImportError:
    387                     raise CompressionError("bz2 module is not available")
    388                 if mode == "r":
    389                     self.dbuf = b""
    390                     self.cmp = bz2.BZ2Decompressor()
    391                     self.exception = OSError
    392                 else:
    393                     self.cmp = bz2.BZ2Compressor()
    394 
    395             elif comptype == "xz":
    396                 try:
    397                     import lzma
    398                 except ImportError:
    399                     raise CompressionError("lzma module is not available")
    400                 if mode == "r":
    401                     self.dbuf = b""
    402                     self.cmp = lzma.LZMADecompressor()
    403                     self.exception = lzma.LZMAError
    404                 else:
    405                     self.cmp = lzma.LZMACompressor()
    406 
    407             elif comptype != "tar":
    408                 raise CompressionError("unknown compression type %r" % comptype)
    409 
    410         except:
    411             if not self._extfileobj:
    412                 self.fileobj.close()
    413             self.closed = True
    414             raise
    415 
    416     def __del__(self):
    417         if hasattr(self, "closed") and not self.closed:
    418             self.close()
    419 
    420     def _init_write_gz(self):
    421         """Initialize for writing with gzip compression.
    422         """
    423         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
    424                                             -self.zlib.MAX_WBITS,
    425                                             self.zlib.DEF_MEM_LEVEL,
    426                                             0)
    427         timestamp = struct.pack("<L", int(time.time()))
    428         self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
    429         if self.name.endswith(".gz"):
    430             self.name = self.name[:-3]
    431         # RFC1952 says we must use ISO-8859-1 for the FNAME field.
    432         self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
    433 
    434     def write(self, s):
    435         """Write string s to the stream.
    436         """
    437         if self.comptype == "gz":
    438             self.crc = self.zlib.crc32(s, self.crc)
    439         self.pos += len(s)
    440         if self.comptype != "tar":
    441             s = self.cmp.compress(s)
    442         self.__write(s)
    443 
    444     def __write(self, s):
    445         """Write string s to the stream if a whole new block
    446            is ready to be written.
    447         """
    448         self.buf += s
    449         while len(self.buf) > self.bufsize:
    450             self.fileobj.write(self.buf[:self.bufsize])
    451             self.buf = self.buf[self.bufsize:]
    452 
    453     def close(self):
    454         """Close the _Stream object. No operation should be
    455            done on it afterwards.
    456         """
    457         if self.closed:
    458             return
    459 
    460         self.closed = True
    461         try:
    462             if self.mode == "w" and self.comptype != "tar":
    463                 self.buf += self.cmp.flush()
    464 
    465             if self.mode == "w" and self.buf:
    466                 self.fileobj.write(self.buf)
    467                 self.buf = b""
    468                 if self.comptype == "gz":
    469                     self.fileobj.write(struct.pack("<L", self.crc))
    470                     self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
    471         finally:
    472             if not self._extfileobj:
    473                 self.fileobj.close()
    474 
    475     def _init_read_gz(self):
    476         """Initialize for reading a gzip compressed fileobj.
    477         """
    478         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
    479         self.dbuf = b""
    480 
    481         # taken from gzip.GzipFile with some alterations
    482         if self.__read(2) != b"\037\213":
    483             raise ReadError("not a gzip file")
    484         if self.__read(1) != b"\010":
    485             raise CompressionError("unsupported compression method")
    486 
    487         flag = ord(self.__read(1))
    488         self.__read(6)
    489 
    490         if flag & 4:
    491             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
    492             self.read(xlen)
    493         if flag & 8:
    494             while True:
    495                 s = self.__read(1)
    496                 if not s or s == NUL:
    497                     break
    498         if flag & 16:
    499             while True:
    500                 s = self.__read(1)
    501                 if not s or s == NUL:
    502                     break
    503         if flag & 2:
    504             self.__read(2)
    505 
    506     def tell(self):
    507         """Return the stream's file pointer position.
    508         """
    509         return self.pos
    510 
    511     def seek(self, pos=0):
    512         """Set the stream's file pointer to pos. Negative seeking
    513            is forbidden.
    514         """
    515         if pos - self.pos >= 0:
    516             blocks, remainder = divmod(pos - self.pos, self.bufsize)
    517             for i in range(blocks):
    518                 self.read(self.bufsize)
    519             self.read(remainder)
    520         else:
    521             raise StreamError("seeking backwards is not allowed")
    522         return self.pos
    523 
    524     def read(self, size=None):
    525         """Return the next size number of bytes from the stream.
    526            If size is not defined, return all bytes of the stream
    527            up to EOF.
    528         """
    529         if size is None:
    530             t = []
    531             while True:
    532                 buf = self._read(self.bufsize)
    533                 if not buf:
    534                     break
    535                 t.append(buf)
    536             buf = "".join(t)
    537         else:
    538             buf = self._read(size)
    539         self.pos += len(buf)
    540         return buf
    541 
    542     def _read(self, size):
    543         """Return size bytes from the stream.
    544         """
    545         if self.comptype == "tar":
    546             return self.__read(size)
    547 
    548         c = len(self.dbuf)
    549         while c < size:
    550             buf = self.__read(self.bufsize)
    551             if not buf:
    552                 break
    553             try:
    554                 buf = self.cmp.decompress(buf)
    555             except self.exception:
    556                 raise ReadError("invalid compressed data")
    557             self.dbuf += buf
    558             c += len(buf)
    559         buf = self.dbuf[:size]
    560         self.dbuf = self.dbuf[size:]
    561         return buf
    562 
    563     def __read(self, size):
    564         """Return size bytes from stream. If internal buffer is empty,
    565            read another block from the stream.
    566         """
    567         c = len(self.buf)
    568         while c < size:
    569             buf = self.fileobj.read(self.bufsize)
    570             if not buf:
    571                 break
    572             self.buf += buf
    573             c += len(buf)
    574         buf = self.buf[:size]
    575         self.buf = self.buf[size:]
    576         return buf
    577 # class _Stream
    578 
    579 class _StreamProxy(object):
    580     """Small proxy class that enables transparent compression
    581        detection for the Stream interface (mode 'r|*').
    582     """
    583 
    584     def __init__(self, fileobj):
    585         self.fileobj = fileobj
    586         self.buf = self.fileobj.read(BLOCKSIZE)
    587 
    588     def read(self, size):
    589         self.read = self.fileobj.read
    590         return self.buf
    591 
    592     def getcomptype(self):
    593         if self.buf.startswith(b"\x1f\x8b\x08"):
    594             return "gz"
    595         elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
    596             return "bz2"
    597         elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
    598             return "xz"
    599         else:
    600             return "tar"
    601 
    602     def close(self):
    603         self.fileobj.close()
    604 # class StreamProxy
    605 
    606 #------------------------
    607 # Extraction file object
    608 #------------------------
    609 class _FileInFile(object):
    610     """A thin wrapper around an existing file object that
    611        provides a part of its data as an individual file
    612        object.
    613     """
    614 
    615     def __init__(self, fileobj, offset, size, blockinfo=None):
    616         self.fileobj = fileobj
    617         self.offset = offset
    618         self.size = size
    619         self.position = 0
    620         self.name = getattr(fileobj, "name", None)
    621         self.closed = False
    622 
    623         if blockinfo is None:
    624             blockinfo = [(0, size)]
    625 
    626         # Construct a map with data and zero blocks.
    627         self.map_index = 0
    628         self.map = []
    629         lastpos = 0
    630         realpos = self.offset
    631         for offset, size in blockinfo:
    632             if offset > lastpos:
    633                 self.map.append((False, lastpos, offset, None))
    634             self.map.append((True, offset, offset + size, realpos))
    635             realpos += size
    636             lastpos = offset + size
    637         if lastpos < self.size:
    638             self.map.append((False, lastpos, self.size, None))
    639 
    640     def flush(self):
    641         pass
    642 
    643     def readable(self):
    644         return True
    645 
    646     def writable(self):
    647         return False
    648 
    649     def seekable(self):
    650         return self.fileobj.seekable()
    651 
    652     def tell(self):
    653         """Return the current file position.
    654         """
    655         return self.position
    656 
    657     def seek(self, position, whence=io.SEEK_SET):
    658         """Seek to a position in the file.
    659         """
    660         if whence == io.SEEK_SET:
    661             self.position = min(max(position, 0), self.size)
    662         elif whence == io.SEEK_CUR:
    663             if position < 0:
    664                 self.position = max(self.position + position, 0)
    665             else:
    666                 self.position = min(self.position + position, self.size)
    667         elif whence == io.SEEK_END:
    668             self.position = max(min(self.size + position, self.size), 0)
    669         else:
    670             raise ValueError("Invalid argument")
    671         return self.position
    672 
    673     def read(self, size=None):
    674         """Read data from the file.
    675         """
    676         if size is None:
    677             size = self.size - self.position
    678         else:
    679             size = min(size, self.size - self.position)
    680 
    681         buf = b""
    682         while size > 0:
    683             while True:
    684                 data, start, stop, offset = self.map[self.map_index]
    685                 if start <= self.position < stop:
    686                     break
    687                 else:
    688                     self.map_index += 1
    689                     if self.map_index == len(self.map):
    690                         self.map_index = 0
    691             length = min(size, stop - self.position)
    692             if data:
    693                 self.fileobj.seek(offset + (self.position - start))
    694                 b = self.fileobj.read(length)
    695                 if len(b) != length:
    696                     raise ReadError("unexpected end of data")
    697                 buf += b
    698             else:
    699                 buf += NUL * length
    700             size -= length
    701             self.position += length
    702         return buf
    703 
    704     def readinto(self, b):
    705         buf = self.read(len(b))
    706         b[:len(buf)] = buf
    707         return len(buf)
    708 
    709     def close(self):
    710         self.closed = True
    711 #class _FileInFile
    712 
    713 class ExFileObject(io.BufferedReader):
    714 
    715     def __init__(self, tarfile, tarinfo):
    716         fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
    717                 tarinfo.size, tarinfo.sparse)
    718         super().__init__(fileobj)
    719 #class ExFileObject
    720 
    721 #------------------
    722 # Exported Classes
    723 #------------------
    724 class TarInfo(object):
    725     """Informational class which holds the details about an
    726        archive member given by a tar header block.
    727        TarInfo objects are returned by TarFile.getmember(),
    728        TarFile.getmembers() and TarFile.gettarinfo() and are
    729        usually created internally.
    730     """
    731 
    732     __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
    733                  "chksum", "type", "linkname", "uname", "gname",
    734                  "devmajor", "devminor",
    735                  "offset", "offset_data", "pax_headers", "sparse",
    736                  "tarfile", "_sparse_structs", "_link_target")
    737 
    738     def __init__(self, name=""):
    739         """Construct a TarInfo object. name is the optional name
    740            of the member.
    741         """
    742         self.name = name        # member name
    743         self.mode = 0o644       # file permissions
    744         self.uid = 0            # user id
    745         self.gid = 0            # group id
    746         self.size = 0           # file size
    747         self.mtime = 0          # modification time
    748         self.chksum = 0         # header checksum
    749         self.type = REGTYPE     # member type
    750         self.linkname = ""      # link name
    751         self.uname = ""         # user name
    752         self.gname = ""         # group name
    753         self.devmajor = 0       # device major number
    754         self.devminor = 0       # device minor number
    755 
    756         self.offset = 0         # the tar header starts here
    757         self.offset_data = 0    # the file's data starts here
    758 
    759         self.sparse = None      # sparse member information
    760         self.pax_headers = {}   # pax header information
    761 
    762     # In pax headers the "name" and "linkname" field are called
    763     # "path" and "linkpath".
    764     def _getpath(self):
    765         return self.name
    766     def _setpath(self, name):
    767         self.name = name
    768     path = property(_getpath, _setpath)
    769 
    770     def _getlinkpath(self):
    771         return self.linkname
    772     def _setlinkpath(self, linkname):
    773         self.linkname = linkname
    774     linkpath = property(_getlinkpath, _setlinkpath)
    775 
    776     def __repr__(self):
    777         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
    778 
    779     def get_info(self):
    780         """Return the TarInfo's attributes as a dictionary.
    781         """
    782         info = {
    783             "name":     self.name,
    784             "mode":     self.mode & 0o7777,
    785             "uid":      self.uid,
    786             "gid":      self.gid,
    787             "size":     self.size,
    788             "mtime":    self.mtime,
    789             "chksum":   self.chksum,
    790             "type":     self.type,
    791             "linkname": self.linkname,
    792             "uname":    self.uname,
    793             "gname":    self.gname,
    794             "devmajor": self.devmajor,
    795             "devminor": self.devminor
    796         }
    797 
    798         if info["type"] == DIRTYPE and not info["name"].endswith("/"):
    799             info["name"] += "/"
    800 
    801         return info
    802 
    803     def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
    804         """Return a tar header as a string of 512 byte blocks.
    805         """
    806         info = self.get_info()
    807 
    808         if format == USTAR_FORMAT:
    809             return self.create_ustar_header(info, encoding, errors)
    810         elif format == GNU_FORMAT:
    811             return self.create_gnu_header(info, encoding, errors)
    812         elif format == PAX_FORMAT:
    813             return self.create_pax_header(info, encoding)
    814         else:
    815             raise ValueError("invalid format")
    816 
    817     def create_ustar_header(self, info, encoding, errors):
    818         """Return the object as a ustar header block.
    819         """
    820         info["magic"] = POSIX_MAGIC
    821 
    822         if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
    823             raise ValueError("linkname is too long")
    824 
    825         if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
    826             info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
    827 
    828         return self._create_header(info, USTAR_FORMAT, encoding, errors)
    829 
    830     def create_gnu_header(self, info, encoding, errors):
    831         """Return the object as a GNU header block sequence.
    832         """
    833         info["magic"] = GNU_MAGIC
    834 
    835         buf = b""
    836         if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
    837             buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
    838 
    839         if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
    840             buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
    841 
    842         return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
    843 
    844     def create_pax_header(self, info, encoding):
    845         """Return the object as a ustar header block. If it cannot be
    846            represented this way, prepend a pax extended header sequence
    847            with supplement information.
    848         """
    849         info["magic"] = POSIX_MAGIC
    850         pax_headers = self.pax_headers.copy()
    851 
    852         # Test string fields for values that exceed the field length or cannot
    853         # be represented in ASCII encoding.
    854         for name, hname, length in (
    855                 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
    856                 ("uname", "uname", 32), ("gname", "gname", 32)):
    857 
    858             if hname in pax_headers:
    859                 # The pax header has priority.
    860                 continue
    861 
    862             # Try to encode the string as ASCII.
    863             try:
    864                 info[name].encode("ascii", "strict")
    865             except UnicodeEncodeError:
    866                 pax_headers[hname] = info[name]
    867                 continue
    868 
    869             if len(info[name]) > length:
    870                 pax_headers[hname] = info[name]
    871 
    872         # Test number fields for values that exceed the field limit or values
    873         # that like to be stored as float.
    874         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
    875             if name in pax_headers:
    876                 # The pax header has priority. Avoid overflow.
    877                 info[name] = 0
    878                 continue
    879 
    880             val = info[name]
    881             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
    882                 pax_headers[name] = str(val)
    883                 info[name] = 0
    884 
    885         # Create a pax extended header if necessary.
    886         if pax_headers:
    887             buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
    888         else:
    889             buf = b""
    890 
    891         return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
    892 
    893     @classmethod
    894     def create_pax_global_header(cls, pax_headers):
    895         """Return the object as a pax global header block sequence.
    896         """
    897         return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
    898 
    899     def _posix_split_name(self, name, encoding, errors):
    900         """Split a name longer than 100 chars into a prefix
    901            and a name part.
    902         """
    903         components = name.split("/")
    904         for i in range(1, len(components)):
    905             prefix = "/".join(components[:i])
    906             name = "/".join(components[i:])
    907             if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
    908                     len(name.encode(encoding, errors)) <= LENGTH_NAME:
    909                 break
    910         else:
    911             raise ValueError("name is too long")
    912 
    913         return prefix, name
    914 
    915     @staticmethod
    916     def _create_header(info, format, encoding, errors):
    917         """Return a header block. info is a dictionary with file
    918            information, format must be one of the *_FORMAT constants.
    919         """
    920         parts = [
    921             stn(info.get("name", ""), 100, encoding, errors),
    922             itn(info.get("mode", 0) & 0o7777, 8, format),
    923             itn(info.get("uid", 0), 8, format),
    924             itn(info.get("gid", 0), 8, format),
    925             itn(info.get("size", 0), 12, format),
    926             itn(info.get("mtime", 0), 12, format),
    927             b"        ", # checksum field
    928             info.get("type", REGTYPE),
    929             stn(info.get("linkname", ""), 100, encoding, errors),
    930             info.get("magic", POSIX_MAGIC),
    931             stn(info.get("uname", ""), 32, encoding, errors),
    932             stn(info.get("gname", ""), 32, encoding, errors),
    933             itn(info.get("devmajor", 0), 8, format),
    934             itn(info.get("devminor", 0), 8, format),
    935             stn(info.get("prefix", ""), 155, encoding, errors)
    936         ]
    937 
    938         buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
    939         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
    940         buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
    941         return buf
    942 
    943     @staticmethod
    944     def _create_payload(payload):
    945         """Return the string payload filled with zero bytes
    946            up to the next 512 byte border.
    947         """
    948         blocks, remainder = divmod(len(payload), BLOCKSIZE)
    949         if remainder > 0:
    950             payload += (BLOCKSIZE - remainder) * NUL
    951         return payload
    952 
    953     @classmethod
    954     def _create_gnu_long_header(cls, name, type, encoding, errors):
    955         """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
    956            for name.
    957         """
    958         name = name.encode(encoding, errors) + NUL
    959 
    960         info = {}
    961         info["name"] = "././@LongLink"
    962         info["type"] = type
    963         info["size"] = len(name)
    964         info["magic"] = GNU_MAGIC
    965 
    966         # create extended header + name blocks.
    967         return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
    968                 cls._create_payload(name)
    969 
    970     @classmethod
    971     def _create_pax_generic_header(cls, pax_headers, type, encoding):
    972         """Return a POSIX.1-2008 extended or global header sequence
    973            that contains a list of keyword, value pairs. The values
    974            must be strings.
    975         """
    976         # Check if one of the fields contains surrogate characters and thereby
    977         # forces hdrcharset=BINARY, see _proc_pax() for more information.
    978         binary = False
    979         for keyword, value in pax_headers.items():
    980             try:
    981                 value.encode("utf-8", "strict")
    982             except UnicodeEncodeError:
    983                 binary = True
    984                 break
    985 
    986         records = b""
    987         if binary:
    988             # Put the hdrcharset field at the beginning of the header.
    989             records += b"21 hdrcharset=BINARY\n"
    990 
    991         for keyword, value in pax_headers.items():
    992             keyword = keyword.encode("utf-8")
    993             if binary:
    994                 # Try to restore the original byte representation of `value'.
    995                 # Needless to say, that the encoding must match the string.
    996                 value = value.encode(encoding, "surrogateescape")
    997             else:
    998                 value = value.encode("utf-8")
    999 
   1000             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
   1001             n = p = 0
   1002             while True:
   1003                 n = l + len(str(p))
   1004                 if n == p:
   1005                     break
   1006                 p = n
   1007             records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
   1008 
   1009         # We use a hardcoded "././@PaxHeader" name like star does
   1010         # instead of the one that POSIX recommends.
   1011         info = {}
   1012         info["name"] = "././@PaxHeader"
   1013         info["type"] = type
   1014         info["size"] = len(records)
   1015         info["magic"] = POSIX_MAGIC
   1016 
   1017         # Create pax header + record blocks.
   1018         return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
   1019                 cls._create_payload(records)
   1020 
   1021     @classmethod
   1022     def frombuf(cls, buf, encoding, errors):
   1023         """Construct a TarInfo object from a 512 byte bytes object.
   1024         """
   1025         if len(buf) == 0:
   1026             raise EmptyHeaderError("empty header")
   1027         if len(buf) != BLOCKSIZE:
   1028             raise TruncatedHeaderError("truncated header")
   1029         if buf.count(NUL) == BLOCKSIZE:
   1030             raise EOFHeaderError("end of file header")
   1031 
   1032         chksum = nti(buf[148:156])
   1033         if chksum not in calc_chksums(buf):
   1034             raise InvalidHeaderError("bad checksum")
   1035 
   1036         obj = cls()
   1037         obj.name = nts(buf[0:100], encoding, errors)
   1038         obj.mode = nti(buf[100:108])
   1039         obj.uid = nti(buf[108:116])
   1040         obj.gid = nti(buf[116:124])
   1041         obj.size = nti(buf[124:136])
   1042         obj.mtime = nti(buf[136:148])
   1043         obj.chksum = chksum
   1044         obj.type = buf[156:157]
   1045         obj.linkname = nts(buf[157:257], encoding, errors)
   1046         obj.uname = nts(buf[265:297], encoding, errors)
   1047         obj.gname = nts(buf[297:329], encoding, errors)
   1048         obj.devmajor = nti(buf[329:337])
   1049         obj.devminor = nti(buf[337:345])
   1050         prefix = nts(buf[345:500], encoding, errors)
   1051 
   1052         # Old V7 tar format represents a directory as a regular
   1053         # file with a trailing slash.
   1054         if obj.type == AREGTYPE and obj.name.endswith("/"):
   1055             obj.type = DIRTYPE
   1056 
   1057         # The old GNU sparse format occupies some of the unused
   1058         # space in the buffer for up to 4 sparse structures.
   1059         # Save the them for later processing in _proc_sparse().
   1060         if obj.type == GNUTYPE_SPARSE:
   1061             pos = 386
   1062             structs = []
   1063             for i in range(4):
   1064                 try:
   1065                     offset = nti(buf[pos:pos + 12])
   1066                     numbytes = nti(buf[pos + 12:pos + 24])
   1067                 except ValueError:
   1068                     break
   1069                 structs.append((offset, numbytes))
   1070                 pos += 24
   1071             isextended = bool(buf[482])
   1072             origsize = nti(buf[483:495])
   1073             obj._sparse_structs = (structs, isextended, origsize)
   1074 
   1075         # Remove redundant slashes from directories.
   1076         if obj.isdir():
   1077             obj.name = obj.name.rstrip("/")
   1078 
   1079         # Reconstruct a ustar longname.
   1080         if prefix and obj.type not in GNU_TYPES:
   1081             obj.name = prefix + "/" + obj.name
   1082         return obj
   1083 
   1084     @classmethod
   1085     def fromtarfile(cls, tarfile):
   1086         """Return the next TarInfo object from TarFile object
   1087            tarfile.
   1088         """
   1089         buf = tarfile.fileobj.read(BLOCKSIZE)
   1090         obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
   1091         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
   1092         return obj._proc_member(tarfile)
   1093 
   1094     #--------------------------------------------------------------------------
   1095     # The following are methods that are called depending on the type of a
   1096     # member. The entry point is _proc_member() which can be overridden in a
   1097     # subclass to add custom _proc_*() methods. A _proc_*() method MUST
   1098     # implement the following
   1099     # operations:
   1100     # 1. Set self.offset_data to the position where the data blocks begin,
   1101     #    if there is data that follows.
   1102     # 2. Set tarfile.offset to the position where the next member's header will
   1103     #    begin.
   1104     # 3. Return self or another valid TarInfo object.
   1105     def _proc_member(self, tarfile):
   1106         """Choose the right processing method depending on
   1107            the type and call it.
   1108         """
   1109         if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
   1110             return self._proc_gnulong(tarfile)
   1111         elif self.type == GNUTYPE_SPARSE:
   1112             return self._proc_sparse(tarfile)
   1113         elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
   1114             return self._proc_pax(tarfile)
   1115         else:
   1116             return self._proc_builtin(tarfile)
   1117 
   1118     def _proc_builtin(self, tarfile):
   1119         """Process a builtin type or an unknown type which
   1120            will be treated as a regular file.
   1121         """
   1122         self.offset_data = tarfile.fileobj.tell()
   1123         offset = self.offset_data
   1124         if self.isreg() or self.type not in SUPPORTED_TYPES:
   1125             # Skip the following data blocks.
   1126             offset += self._block(self.size)
   1127         tarfile.offset = offset
   1128 
   1129         # Patch the TarInfo object with saved global
   1130         # header information.
   1131         self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
   1132 
   1133         return self
   1134 
   1135     def _proc_gnulong(self, tarfile):
   1136         """Process the blocks that hold a GNU longname
   1137            or longlink member.
   1138         """
   1139         buf = tarfile.fileobj.read(self._block(self.size))
   1140 
   1141         # Fetch the next header and process it.
   1142         try:
   1143             next = self.fromtarfile(tarfile)
   1144         except HeaderError:
   1145             raise SubsequentHeaderError("missing or bad subsequent header")
   1146 
   1147         # Patch the TarInfo object from the next header with
   1148         # the longname information.
   1149         next.offset = self.offset
   1150         if self.type == GNUTYPE_LONGNAME:
   1151             next.name = nts(buf, tarfile.encoding, tarfile.errors)
   1152         elif self.type == GNUTYPE_LONGLINK:
   1153             next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
   1154 
   1155         return next
   1156 
   1157     def _proc_sparse(self, tarfile):
   1158         """Process a GNU sparse header plus extra headers.
   1159         """
   1160         # We already collected some sparse structures in frombuf().
   1161         structs, isextended, origsize = self._sparse_structs
   1162         del self._sparse_structs
   1163 
   1164         # Collect sparse structures from extended header blocks.
   1165         while isextended:
   1166             buf = tarfile.fileobj.read(BLOCKSIZE)
   1167             pos = 0
   1168             for i in range(21):
   1169                 try:
   1170                     offset = nti(buf[pos:pos + 12])
   1171                     numbytes = nti(buf[pos + 12:pos + 24])
   1172                 except ValueError:
   1173                     break
   1174                 if offset and numbytes:
   1175                     structs.append((offset, numbytes))
   1176                 pos += 24
   1177             isextended = bool(buf[504])
   1178         self.sparse = structs
   1179 
   1180         self.offset_data = tarfile.fileobj.tell()
   1181         tarfile.offset = self.offset_data + self._block(self.size)
   1182         self.size = origsize
   1183         return self
   1184 
   1185     def _proc_pax(self, tarfile):
   1186         """Process an extended or global header as described in
   1187            POSIX.1-2008.
   1188         """
   1189         # Read the header information.
   1190         buf = tarfile.fileobj.read(self._block(self.size))
   1191 
   1192         # A pax header stores supplemental information for either
   1193         # the following file (extended) or all following files
   1194         # (global).
   1195         if self.type == XGLTYPE:
   1196             pax_headers = tarfile.pax_headers
   1197         else:
   1198             pax_headers = tarfile.pax_headers.copy()
   1199 
   1200         # Check if the pax header contains a hdrcharset field. This tells us
   1201         # the encoding of the path, linkpath, uname and gname fields. Normally,
   1202         # these fields are UTF-8 encoded but since POSIX.1-2008 tar
   1203         # implementations are allowed to store them as raw binary strings if
   1204         # the translation to UTF-8 fails.
   1205         match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
   1206         if match is not None:
   1207             pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
   1208 
   1209         # For the time being, we don't care about anything other than "BINARY".
   1210         # The only other value that is currently allowed by the standard is
   1211         # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
   1212         hdrcharset = pax_headers.get("hdrcharset")
   1213         if hdrcharset == "BINARY":
   1214             encoding = tarfile.encoding
   1215         else:
   1216             encoding = "utf-8"
   1217 
   1218         # Parse pax header information. A record looks like that:
   1219         # "%d %s=%s\n" % (length, keyword, value). length is the size
   1220         # of the complete record including the length field itself and
   1221         # the newline. keyword and value are both UTF-8 encoded strings.
   1222         regex = re.compile(br"(\d+) ([^=]+)=")
   1223         pos = 0
   1224         while True:
   1225             match = regex.match(buf, pos)
   1226             if not match:
   1227                 break
   1228 
   1229             length, keyword = match.groups()
   1230             length = int(length)
   1231             value = buf[match.end(2) + 1:match.start(1) + length - 1]
   1232 
   1233             # Normally, we could just use "utf-8" as the encoding and "strict"
   1234             # as the error handler, but we better not take the risk. For
   1235             # example, GNU tar <= 1.23 is known to store filenames it cannot
   1236             # translate to UTF-8 as raw strings (unfortunately without a
   1237             # hdrcharset=BINARY header).
   1238             # We first try the strict standard encoding, and if that fails we
   1239             # fall back on the user's encoding and error handler.
   1240             keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
   1241                     tarfile.errors)
   1242             if keyword in PAX_NAME_FIELDS:
   1243                 value = self._decode_pax_field(value, encoding, tarfile.encoding,
   1244                         tarfile.errors)
   1245             else:
   1246                 value = self._decode_pax_field(value, "utf-8", "utf-8",
   1247                         tarfile.errors)
   1248 
   1249             pax_headers[keyword] = value
   1250             pos += length
   1251 
   1252         # Fetch the next header.
   1253         try:
   1254             next = self.fromtarfile(tarfile)
   1255         except HeaderError:
   1256             raise SubsequentHeaderError("missing or bad subsequent header")
   1257 
   1258         # Process GNU sparse information.
   1259         if "GNU.sparse.map" in pax_headers:
   1260             # GNU extended sparse format version 0.1.
   1261             self._proc_gnusparse_01(next, pax_headers)
   1262 
   1263         elif "GNU.sparse.size" in pax_headers:
   1264             # GNU extended sparse format version 0.0.
   1265             self._proc_gnusparse_00(next, pax_headers, buf)
   1266 
   1267         elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
   1268             # GNU extended sparse format version 1.0.
   1269             self._proc_gnusparse_10(next, pax_headers, tarfile)
   1270 
   1271         if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
   1272             # Patch the TarInfo object with the extended header info.
   1273             next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
   1274             next.offset = self.offset
   1275 
   1276             if "size" in pax_headers:
   1277                 # If the extended header replaces the size field,
   1278                 # we need to recalculate the offset where the next
   1279                 # header starts.
   1280                 offset = next.offset_data
   1281                 if next.isreg() or next.type not in SUPPORTED_TYPES:
   1282                     offset += next._block(next.size)
   1283                 tarfile.offset = offset
   1284 
   1285         return next
   1286 
   1287     def _proc_gnusparse_00(self, next, pax_headers, buf):
   1288         """Process a GNU tar extended sparse header, version 0.0.
   1289         """
   1290         offsets = []
   1291         for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
   1292             offsets.append(int(match.group(1)))
   1293         numbytes = []
   1294         for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
   1295             numbytes.append(int(match.group(1)))
   1296         next.sparse = list(zip(offsets, numbytes))
   1297 
   1298     def _proc_gnusparse_01(self, next, pax_headers):
   1299         """Process a GNU tar extended sparse header, version 0.1.
   1300         """
   1301         sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
   1302         next.sparse = list(zip(sparse[::2], sparse[1::2]))
   1303 
   1304     def _proc_gnusparse_10(self, next, pax_headers, tarfile):
   1305         """Process a GNU tar extended sparse header, version 1.0.
   1306         """
   1307         fields = None
   1308         sparse = []
   1309         buf = tarfile.fileobj.read(BLOCKSIZE)
   1310         fields, buf = buf.split(b"\n", 1)
   1311         fields = int(fields)
   1312         while len(sparse) < fields * 2:
   1313             if b"\n" not in buf:
   1314                 buf += tarfile.fileobj.read(BLOCKSIZE)
   1315             number, buf = buf.split(b"\n", 1)
   1316             sparse.append(int(number))
   1317         next.offset_data = tarfile.fileobj.tell()
   1318         next.sparse = list(zip(sparse[::2], sparse[1::2]))
   1319 
   1320     def _apply_pax_info(self, pax_headers, encoding, errors):
   1321         """Replace fields with supplemental information from a previous
   1322            pax extended or global header.
   1323         """
   1324         for keyword, value in pax_headers.items():
   1325             if keyword == "GNU.sparse.name":
   1326                 setattr(self, "path", value)
   1327             elif keyword == "GNU.sparse.size":
   1328                 setattr(self, "size", int(value))
   1329             elif keyword == "GNU.sparse.realsize":
   1330                 setattr(self, "size", int(value))
   1331             elif keyword in PAX_FIELDS:
   1332                 if keyword in PAX_NUMBER_FIELDS:
   1333                     try:
   1334                         value = PAX_NUMBER_FIELDS[keyword](value)
   1335                     except ValueError:
   1336                         value = 0
   1337                 if keyword == "path":
   1338                     value = value.rstrip("/")
   1339                 setattr(self, keyword, value)
   1340 
   1341         self.pax_headers = pax_headers.copy()
   1342 
   1343     def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
   1344         """Decode a single field from a pax record.
   1345         """
   1346         try:
   1347             return value.decode(encoding, "strict")
   1348         except UnicodeDecodeError:
   1349             return value.decode(fallback_encoding, fallback_errors)
   1350 
   1351     def _block(self, count):
   1352         """Round up a byte count by BLOCKSIZE and return it,
   1353            e.g. _block(834) => 1024.
   1354         """
   1355         blocks, remainder = divmod(count, BLOCKSIZE)
   1356         if remainder:
   1357             blocks += 1
   1358         return blocks * BLOCKSIZE
   1359 
   1360     def isreg(self):
   1361         return self.type in REGULAR_TYPES
   1362     def isfile(self):
   1363         return self.isreg()
   1364     def isdir(self):
   1365         return self.type == DIRTYPE
   1366     def issym(self):
   1367         return self.type == SYMTYPE
   1368     def islnk(self):
   1369         return self.type == LNKTYPE
   1370     def ischr(self):
   1371         return self.type == CHRTYPE
   1372     def isblk(self):
   1373         return self.type == BLKTYPE
   1374     def isfifo(self):
   1375         return self.type == FIFOTYPE
   1376     def issparse(self):
   1377         return self.sparse is not None
   1378     def isdev(self):
   1379         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
   1380 # class TarInfo
   1381 
   1382 class TarFile(object):
   1383     """The TarFile Class provides an interface to tar archives.
   1384     """
   1385 
   1386     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
   1387 
   1388     dereference = False         # If true, add content of linked file to the
   1389                                 # tar file, else the link.
   1390 
   1391     ignore_zeros = False        # If true, skips empty or invalid blocks and
   1392                                 # continues processing.
   1393 
   1394     errorlevel = 1              # If 0, fatal errors only appear in debug
   1395                                 # messages (if debug >= 0). If > 0, errors
   1396                                 # are passed to the caller as exceptions.
   1397 
   1398     format = DEFAULT_FORMAT     # The format to use when creating an archive.
   1399 
   1400     encoding = ENCODING         # Encoding for 8-bit character strings.
   1401 
   1402     errors = None               # Error handler for unicode conversion.
   1403 
   1404     tarinfo = TarInfo           # The default TarInfo class to use.
   1405 
   1406     fileobject = ExFileObject   # The file-object for extractfile().
   1407 
   1408     def __init__(self, name=None, mode="r", fileobj=None, format=None,
   1409             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
   1410             errors="surrogateescape", pax_headers=None, debug=None,
   1411             errorlevel=None, copybufsize=None):
   1412         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
   1413            read from an existing archive, 'a' to append data to an existing
   1414            file or 'w' to create a new file overwriting an existing one. `mode'
   1415            defaults to 'r'.
   1416            If `fileobj' is given, it is used for reading or writing data. If it
   1417            can be determined, `mode' is overridden by `fileobj's mode.
   1418            `fileobj' is not closed, when TarFile is closed.
   1419         """
   1420         modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
   1421         if mode not in modes:
   1422             raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
   1423         self.mode = mode
   1424         self._mode = modes[mode]
   1425 
   1426         if not fileobj:
   1427             if self.mode == "a" and not os.path.exists(name):
   1428                 # Create nonexistent files in append mode.
   1429                 self.mode = "w"
   1430                 self._mode = "wb"
   1431             fileobj = bltn_open(name, self._mode)
   1432             self._extfileobj = False
   1433         else:
   1434             if (name is None and hasattr(fileobj, "name") and
   1435                 isinstance(fileobj.name, (str, bytes))):
   1436                 name = fileobj.name
   1437             if hasattr(fileobj, "mode"):
   1438                 self._mode = fileobj.mode
   1439             self._extfileobj = True
   1440         self.name = os.path.abspath(name) if name else None
   1441         self.fileobj = fileobj
   1442 
   1443         # Init attributes.
   1444         if format is not None:
   1445             self.format = format
   1446         if tarinfo is not None:
   1447             self.tarinfo = tarinfo
   1448         if dereference is not None:
   1449             self.dereference = dereference
   1450         if ignore_zeros is not None:
   1451             self.ignore_zeros = ignore_zeros
   1452         if encoding is not None:
   1453             self.encoding = encoding
   1454         self.errors = errors
   1455 
   1456         if pax_headers is not None and self.format == PAX_FORMAT:
   1457             self.pax_headers = pax_headers
   1458         else:
   1459             self.pax_headers = {}
   1460 
   1461         if debug is not None:
   1462             self.debug = debug
   1463         if errorlevel is not None:
   1464             self.errorlevel = errorlevel
   1465 
   1466         # Init datastructures.
   1467         self.copybufsize = copybufsize
   1468         self.closed = False
   1469         self.members = []       # list of members as TarInfo objects
   1470         self._loaded = False    # flag if all members have been read
   1471         self.offset = self.fileobj.tell()
   1472                                 # current position in the archive file
   1473         self.inodes = {}        # dictionary caching the inodes of
   1474                                 # archive members already added
   1475 
   1476         try:
   1477             if self.mode == "r":
   1478                 self.firstmember = None
   1479                 self.firstmember = self.next()
   1480 
   1481             if self.mode == "a":
   1482                 # Move to the end of the archive,
   1483                 # before the first empty block.
   1484                 while True:
   1485                     self.fileobj.seek(self.offset)
   1486                     try:
   1487                         tarinfo = self.tarinfo.fromtarfile(self)
   1488                         self.members.append(tarinfo)
   1489                     except EOFHeaderError:
   1490                         self.fileobj.seek(self.offset)
   1491                         break
   1492                     except HeaderError as e:
   1493                         raise ReadError(str(e))
   1494 
   1495             if self.mode in ("a", "w", "x"):
   1496                 self._loaded = True
   1497 
   1498                 if self.pax_headers:
   1499                     buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
   1500                     self.fileobj.write(buf)
   1501                     self.offset += len(buf)
   1502         except:
   1503             if not self._extfileobj:
   1504                 self.fileobj.close()
   1505             self.closed = True
   1506             raise
   1507 
   1508     #--------------------------------------------------------------------------
   1509     # Below are the classmethods which act as alternate constructors to the
   1510     # TarFile class. The open() method is the only one that is needed for
   1511     # public use; it is the "super"-constructor and is able to select an
   1512     # adequate "sub"-constructor for a particular compression using the mapping
   1513     # from OPEN_METH.
   1514     #
   1515     # This concept allows one to subclass TarFile without losing the comfort of
   1516     # the super-constructor. A sub-constructor is registered and made available
   1517     # by adding it to the mapping in OPEN_METH.
   1518 
   1519     @classmethod
   1520     def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
   1521         """Open a tar archive for reading, writing or appending. Return
   1522            an appropriate TarFile class.
   1523 
   1524            mode:
   1525            'r' or 'r:*' open for reading with transparent compression
   1526            'r:'         open for reading exclusively uncompressed
   1527            'r:gz'       open for reading with gzip compression
   1528            'r:bz2'      open for reading with bzip2 compression
   1529            'r:xz'       open for reading with lzma compression
   1530            'a' or 'a:'  open for appending, creating the file if necessary
   1531            'w' or 'w:'  open for writing without compression
   1532            'w:gz'       open for writing with gzip compression
   1533            'w:bz2'      open for writing with bzip2 compression
   1534            'w:xz'       open for writing with lzma compression
   1535 
   1536            'x' or 'x:'  create a tarfile exclusively without compression, raise
   1537                         an exception if the file is already created
   1538            'x:gz'       create a gzip compressed tarfile, raise an exception
   1539                         if the file is already created
   1540            'x:bz2'      create a bzip2 compressed tarfile, raise an exception
   1541                         if the file is already created
   1542            'x:xz'       create an lzma compressed tarfile, raise an exception
   1543                         if the file is already created
   1544 
   1545            'r|*'        open a stream of tar blocks with transparent compression
   1546            'r|'         open an uncompressed stream of tar blocks for reading
   1547            'r|gz'       open a gzip compressed stream of tar blocks
   1548            'r|bz2'      open a bzip2 compressed stream of tar blocks
   1549            'r|xz'       open an lzma compressed stream of tar blocks
   1550            'w|'         open an uncompressed stream for writing
   1551            'w|gz'       open a gzip compressed stream for writing
   1552            'w|bz2'      open a bzip2 compressed stream for writing
   1553            'w|xz'       open an lzma compressed stream for writing
   1554         """
   1555 
   1556         if not name and not fileobj:
   1557             raise ValueError("nothing to open")
   1558 
   1559         if mode in ("r", "r:*"):
   1560             # Find out which *open() is appropriate for opening the file.
   1561             def not_compressed(comptype):
   1562                 return cls.OPEN_METH[comptype] == 'taropen'
   1563             for comptype in sorted(cls.OPEN_METH, key=not_compressed):
   1564                 func = getattr(cls, cls.OPEN_METH[comptype])
   1565                 if fileobj is not None:
   1566                     saved_pos = fileobj.tell()
   1567                 try:
   1568                     return func(name, "r", fileobj, **kwargs)
   1569                 except (ReadError, CompressionError):
   1570                     if fileobj is not None:
   1571                         fileobj.seek(saved_pos)
   1572                     continue
   1573             raise ReadError("file could not be opened successfully")
   1574 
   1575         elif ":" in mode:
   1576             filemode, comptype = mode.split(":", 1)
   1577             filemode = filemode or "r"
   1578             comptype = comptype or "tar"
   1579 
   1580             # Select the *open() function according to
   1581             # given compression.
   1582             if comptype in cls.OPEN_METH:
   1583                 func = getattr(cls, cls.OPEN_METH[comptype])
   1584             else:
   1585                 raise CompressionError("unknown compression type %r" % comptype)
   1586             return func(name, filemode, fileobj, **kwargs)
   1587 
   1588         elif "|" in mode:
   1589             filemode, comptype = mode.split("|", 1)
   1590             filemode = filemode or "r"
   1591             comptype = comptype or "tar"
   1592 
   1593             if filemode not in ("r", "w"):
   1594                 raise ValueError("mode must be 'r' or 'w'")
   1595 
   1596             stream = _Stream(name, filemode, comptype, fileobj, bufsize)
   1597             try:
   1598                 t = cls(name, filemode, stream, **kwargs)
   1599             except:
   1600                 stream.close()
   1601                 raise
   1602             t._extfileobj = False
   1603             return t
   1604 
   1605         elif mode in ("a", "w", "x"):
   1606             return cls.taropen(name, mode, fileobj, **kwargs)
   1607 
   1608         raise ValueError("undiscernible mode")
   1609 
   1610     @classmethod
   1611     def taropen(cls, name, mode="r", fileobj=None, **kwargs):
   1612         """Open uncompressed tar archive name for reading or writing.
   1613         """
   1614         if mode not in ("r", "a", "w", "x"):
   1615             raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
   1616         return cls(name, mode, fileobj, **kwargs)
   1617 
   1618     @classmethod
   1619     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
   1620         """Open gzip compressed tar archive name for reading or writing.
   1621            Appending is not allowed.
   1622         """
   1623         if mode not in ("r", "w", "x"):
   1624             raise ValueError("mode must be 'r', 'w' or 'x'")
   1625 
   1626         try:
   1627             import gzip
   1628             gzip.GzipFile
   1629         except (ImportError, AttributeError):
   1630             raise CompressionError("gzip module is not available")
   1631 
   1632         try:
   1633             fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
   1634         except OSError:
   1635             if fileobj is not None and mode == 'r':
   1636                 raise ReadError("not a gzip file")
   1637             raise
   1638 
   1639         try:
   1640             t = cls.taropen(name, mode, fileobj, **kwargs)
   1641         except OSError:
   1642             fileobj.close()
   1643             if mode == 'r':
   1644                 raise ReadError("not a gzip file")
   1645             raise
   1646         except:
   1647             fileobj.close()
   1648             raise
   1649         t._extfileobj = False
   1650         return t
   1651 
   1652     @classmethod
   1653     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
   1654         """Open bzip2 compressed tar archive name for reading or writing.
   1655            Appending is not allowed.
   1656         """
   1657         if mode not in ("r", "w", "x"):
   1658             raise ValueError("mode must be 'r', 'w' or 'x'")
   1659 
   1660         try:
   1661             import bz2
   1662         except ImportError:
   1663             raise CompressionError("bz2 module is not available")
   1664 
   1665         fileobj = bz2.BZ2File(fileobj or name, mode,
   1666                               compresslevel=compresslevel)
   1667 
   1668         try:
   1669             t = cls.taropen(name, mode, fileobj, **kwargs)
   1670         except (OSError, EOFError):
   1671             fileobj.close()
   1672             if mode == 'r':
   1673                 raise ReadError("not a bzip2 file")
   1674             raise
   1675         except:
   1676             fileobj.close()
   1677             raise
   1678         t._extfileobj = False
   1679         return t
   1680 
   1681     @classmethod
   1682     def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
   1683         """Open lzma compressed tar archive name for reading or writing.
   1684            Appending is not allowed.
   1685         """
   1686         if mode not in ("r", "w", "x"):
   1687             raise ValueError("mode must be 'r', 'w' or 'x'")
   1688 
   1689         try:
   1690             import lzma
   1691         except ImportError:
   1692             raise CompressionError("lzma module is not available")
   1693 
   1694         fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
   1695 
   1696         try:
   1697             t = cls.taropen(name, mode, fileobj, **kwargs)
   1698         except (lzma.LZMAError, EOFError):
   1699             fileobj.close()
   1700             if mode == 'r':
   1701                 raise ReadError("not an lzma file")
   1702             raise
   1703         except:
   1704             fileobj.close()
   1705             raise
   1706         t._extfileobj = False
   1707         return t
   1708 
   1709     # All *open() methods are registered here.
   1710     OPEN_METH = {
   1711         "tar": "taropen",   # uncompressed tar
   1712         "gz":  "gzopen",    # gzip compressed tar
   1713         "bz2": "bz2open",   # bzip2 compressed tar
   1714         "xz":  "xzopen"     # lzma compressed tar
   1715     }
   1716 
   1717     #--------------------------------------------------------------------------
   1718     # The public methods which TarFile provides:
   1719 
   1720     def close(self):
   1721         """Close the TarFile. In write-mode, two finishing zero blocks are
   1722            appended to the archive.
   1723         """
   1724         if self.closed:
   1725             return
   1726 
   1727         self.closed = True
   1728         try:
   1729             if self.mode in ("a", "w", "x"):
   1730                 self.fileobj.write(NUL * (BLOCKSIZE * 2))
   1731                 self.offset += (BLOCKSIZE * 2)
   1732                 # fill up the end with zero-blocks
   1733                 # (like option -b20 for tar does)
   1734                 blocks, remainder = divmod(self.offset, RECORDSIZE)
   1735                 if remainder > 0:
   1736                     self.fileobj.write(NUL * (RECORDSIZE - remainder))
   1737         finally:
   1738             if not self._extfileobj:
   1739                 self.fileobj.close()
   1740 
   1741     def getmember(self, name):
   1742         """Return a TarInfo object for member `name'. If `name' can not be
   1743            found in the archive, KeyError is raised. If a member occurs more
   1744            than once in the archive, its last occurrence is assumed to be the
   1745            most up-to-date version.
   1746         """
   1747         tarinfo = self._getmember(name)
   1748         if tarinfo is None:
   1749             raise KeyError("filename %r not found" % name)
   1750         return tarinfo
   1751 
   1752     def getmembers(self):
   1753         """Return the members of the archive as a list of TarInfo objects. The
   1754            list has the same order as the members in the archive.
   1755         """
   1756         self._check()
   1757         if not self._loaded:    # if we want to obtain a list of
   1758             self._load()        # all members, we first have to
   1759                                 # scan the whole archive.
   1760         return self.members
   1761 
   1762     def getnames(self):
   1763         """Return the members of the archive as a list of their names. It has
   1764            the same order as the list returned by getmembers().
   1765         """
   1766         return [tarinfo.name for tarinfo in self.getmembers()]
   1767 
   1768     def gettarinfo(self, name=None, arcname=None, fileobj=None):
   1769         """Create a TarInfo object from the result of os.stat or equivalent
   1770            on an existing file. The file is either named by `name', or
   1771            specified as a file object `fileobj' with a file descriptor. If
   1772            given, `arcname' specifies an alternative name for the file in the
   1773            archive, otherwise, the name is taken from the 'name' attribute of
   1774            'fileobj', or the 'name' argument. The name should be a text
   1775            string.
   1776         """
   1777         self._check("awx")
   1778 
   1779         # When fileobj is given, replace name by
   1780         # fileobj's real name.
   1781         if fileobj is not None:
   1782             name = fileobj.name
   1783 
   1784         # Building the name of the member in the archive.
   1785         # Backward slashes are converted to forward slashes,
   1786         # Absolute paths are turned to relative paths.
   1787         if arcname is None:
   1788             arcname = name
   1789         drv, arcname = os.path.splitdrive(arcname)
   1790         arcname = arcname.replace(os.sep, "/")
   1791         arcname = arcname.lstrip("/")
   1792 
   1793         # Now, fill the TarInfo object with
   1794         # information specific for the file.
   1795         tarinfo = self.tarinfo()
   1796         tarinfo.tarfile = self  # Not needed
   1797 
   1798         # Use os.stat or os.lstat, depending on platform
   1799         # and if symlinks shall be resolved.
   1800         if fileobj is None:
   1801             if hasattr(os, "lstat") and not self.dereference:
   1802                 statres = os.lstat(name)
   1803             else:
   1804                 statres = os.stat(name)
   1805         else:
   1806             statres = os.fstat(fileobj.fileno())
   1807         linkname = ""
   1808 
   1809         stmd = statres.st_mode
   1810         if stat.S_ISREG(stmd):
   1811             inode = (statres.st_ino, statres.st_dev)
   1812             if not self.dereference and statres.st_nlink > 1 and \
   1813                     inode in self.inodes and arcname != self.inodes[inode]:
   1814                 # Is it a hardlink to an already
   1815                 # archived file?
   1816                 type = LNKTYPE
   1817                 linkname = self.inodes[inode]
   1818             else:
   1819                 # The inode is added only if its valid.
   1820                 # For win32 it is always 0.
   1821                 type = REGTYPE
   1822                 if inode[0]:
   1823                     self.inodes[inode] = arcname
   1824         elif stat.S_ISDIR(stmd):
   1825             type = DIRTYPE
   1826         elif stat.S_ISFIFO(stmd):
   1827             type = FIFOTYPE
   1828         elif stat.S_ISLNK(stmd):
   1829             type = SYMTYPE
   1830             linkname = os.readlink(name)
   1831         elif stat.S_ISCHR(stmd):
   1832             type = CHRTYPE
   1833         elif stat.S_ISBLK(stmd):
   1834             type = BLKTYPE
   1835         else:
   1836             return None
   1837 
   1838         # Fill the TarInfo object with all
   1839         # information we can get.
   1840         tarinfo.name = arcname
   1841         tarinfo.mode = stmd
   1842         tarinfo.uid = statres.st_uid
   1843         tarinfo.gid = statres.st_gid
   1844         if type == REGTYPE:
   1845             tarinfo.size = statres.st_size
   1846         else:
   1847             tarinfo.size = 0
   1848         tarinfo.mtime = statres.st_mtime
   1849         tarinfo.type = type
   1850         tarinfo.linkname = linkname
   1851         if pwd:
   1852             try:
   1853                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
   1854             except KeyError:
   1855                 pass
   1856         if grp:
   1857             try:
   1858                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
   1859             except KeyError:
   1860                 pass
   1861 
   1862         if type in (CHRTYPE, BLKTYPE):
   1863             if hasattr(os, "major") and hasattr(os, "minor"):
   1864                 tarinfo.devmajor = os.major(statres.st_rdev)
   1865                 tarinfo.devminor = os.minor(statres.st_rdev)
   1866         return tarinfo
   1867 
   1868     def list(self, verbose=True, *, members=None):
   1869         """Print a table of contents to sys.stdout. If `verbose' is False, only
   1870            the names of the members are printed. If it is True, an `ls -l'-like
   1871            output is produced. `members' is optional and must be a subset of the
   1872            list returned by getmembers().
   1873         """
   1874         self._check()
   1875 
   1876         if members is None:
   1877             members = self
   1878         for tarinfo in members:
   1879             if verbose:
   1880                 _safe_print(stat.filemode(tarinfo.mode))
   1881                 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
   1882                                        tarinfo.gname or tarinfo.gid))
   1883                 if tarinfo.ischr() or tarinfo.isblk():
   1884                     _safe_print("%10s" %
   1885                             ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
   1886                 else:
   1887                     _safe_print("%10d" % tarinfo.size)
   1888                 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
   1889                             % time.localtime(tarinfo.mtime)[:6])
   1890 
   1891             _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
   1892 
   1893             if verbose:
   1894                 if tarinfo.issym():
   1895                     _safe_print("-> " + tarinfo.linkname)
   1896                 if tarinfo.islnk():
   1897                     _safe_print("link to " + tarinfo.linkname)
   1898             print()
   1899 
   1900     def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
   1901         """Add the file `name' to the archive. `name' may be any type of file
   1902            (directory, fifo, symbolic link, etc.). If given, `arcname'
   1903            specifies an alternative name for the file in the archive.
   1904            Directories are added recursively by default. This can be avoided by
   1905            setting `recursive' to False. `exclude' is a function that should
   1906            return True for each filename to be excluded. `filter' is a function
   1907            that expects a TarInfo object argument and returns the changed
   1908            TarInfo object, if it returns None the TarInfo object will be
   1909            excluded from the archive.
   1910         """
   1911         self._check("awx")
   1912 
   1913         if arcname is None:
   1914             arcname = name
   1915 
   1916         # Exclude pathnames.
   1917         if exclude is not None:
   1918             import warnings
   1919             warnings.warn("use the filter argument instead",
   1920                     DeprecationWarning, 2)
   1921             if exclude(name):
   1922                 self._dbg(2, "tarfile: Excluded %r" % name)
   1923                 return
   1924 
   1925         # Skip if somebody tries to archive the archive...
   1926         if self.name is not None and os.path.abspath(name) == self.name:
   1927             self._dbg(2, "tarfile: Skipped %r" % name)
   1928             return
   1929 
   1930         self._dbg(1, name)
   1931 
   1932         # Create a TarInfo object from the file.
   1933         tarinfo = self.gettarinfo(name, arcname)
   1934 
   1935         if tarinfo is None:
   1936             self._dbg(1, "tarfile: Unsupported type %r" % name)
   1937             return
   1938 
   1939         # Change or exclude the TarInfo object.
   1940         if filter is not None:
   1941             tarinfo = filter(tarinfo)
   1942             if tarinfo is None:
   1943                 self._dbg(2, "tarfile: Excluded %r" % name)
   1944                 return
   1945 
   1946         # Append the tar header and data to the archive.
   1947         if tarinfo.isreg():
   1948             with bltn_open(name, "rb") as f:
   1949                 self.addfile(tarinfo, f)
   1950 
   1951         elif tarinfo.isdir():
   1952             self.addfile(tarinfo)
   1953             if recursive:
   1954                 for f in os.listdir(name):
   1955                     self.add(os.path.join(name, f), os.path.join(arcname, f),
   1956                             recursive, exclude, filter=filter)
   1957 
   1958         else:
   1959             self.addfile(tarinfo)
   1960 
   1961     def addfile(self, tarinfo, fileobj=None):
   1962         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
   1963            given, it should be a binary file, and tarinfo.size bytes are read
   1964            from it and added to the archive. You can create TarInfo objects
   1965            directly, or by using gettarinfo().
   1966         """
   1967         self._check("awx")
   1968 
   1969         tarinfo = copy.copy(tarinfo)
   1970 
   1971         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
   1972         self.fileobj.write(buf)
   1973         self.offset += len(buf)
   1974         bufsize=self.copybufsize
   1975         # If there's data to follow, append it.
   1976         if fileobj is not None:
   1977             copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
   1978             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
   1979             if remainder > 0:
   1980                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
   1981                 blocks += 1
   1982             self.offset += blocks * BLOCKSIZE
   1983 
   1984         self.members.append(tarinfo)
   1985 
   1986     def extractall(self, path=".", members=None, *, numeric_owner=False):
   1987         """Extract all members from the archive to the current working
   1988            directory and set owner, modification time and permissions on
   1989            directories afterwards. `path' specifies a different directory
   1990            to extract to. `members' is optional and must be a subset of the
   1991            list returned by getmembers(). If `numeric_owner` is True, only
   1992            the numbers for user/group names are used and not the names.
   1993         """
   1994         directories = []
   1995 
   1996         if members is None:
   1997             members = self
   1998 
   1999         for tarinfo in members:
   2000             if tarinfo.isdir():
   2001                 # Extract directories with a safe mode.
   2002                 directories.append(tarinfo)
   2003                 tarinfo = copy.copy(tarinfo)
   2004                 tarinfo.mode = 0o700
   2005             # Do not set_attrs directories, as we will do that further down
   2006             self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
   2007                          numeric_owner=numeric_owner)
   2008 
   2009         # Reverse sort directories.
   2010         directories.sort(key=lambda a: a.name)
   2011         directories.reverse()
   2012 
   2013         # Set correct owner, mtime and filemode on directories.
   2014         for tarinfo in directories:
   2015             dirpath = os.path.join(path, tarinfo.name)
   2016             try:
   2017                 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
   2018                 self.utime(tarinfo, dirpath)
   2019                 self.chmod(tarinfo, dirpath)
   2020             except ExtractError as e:
   2021                 if self.errorlevel > 1:
   2022                     raise
   2023                 else:
   2024                     self._dbg(1, "tarfile: %s" % e)
   2025 
   2026     def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
   2027         """Extract a member from the archive to the current working directory,
   2028            using its full name. Its file information is extracted as accurately
   2029            as possible. `member' may be a filename or a TarInfo object. You can
   2030            specify a different directory using `path'. File attributes (owner,
   2031            mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
   2032            is True, only the numbers for user/group names are used and not
   2033            the names.
   2034         """
   2035         self._check("r")
   2036 
   2037         if isinstance(member, str):
   2038             tarinfo = self.getmember(member)
   2039         else:
   2040             tarinfo = member
   2041 
   2042         # Prepare the link target for makelink().
   2043         if tarinfo.islnk():
   2044             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
   2045 
   2046         try:
   2047             self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
   2048                                  set_attrs=set_attrs,
   2049                                  numeric_owner=numeric_owner)
   2050         except OSError as e:
   2051             if self.errorlevel > 0:
   2052                 raise
   2053             else:
   2054                 if e.filename is None:
   2055                     self._dbg(1, "tarfile: %s" % e.strerror)
   2056                 else:
   2057                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
   2058         except ExtractError as e:
   2059             if self.errorlevel > 1:
   2060                 raise
   2061             else:
   2062                 self._dbg(1, "tarfile: %s" % e)
   2063 
   2064     def extractfile(self, member):
   2065         """Extract a member from the archive as a file object. `member' may be
   2066            a filename or a TarInfo object. If `member' is a regular file or a
   2067            link, an io.BufferedReader object is returned. Otherwise, None is
   2068            returned.
   2069         """
   2070         self._check("r")
   2071 
   2072         if isinstance(member, str):
   2073             tarinfo = self.getmember(member)
   2074         else:
   2075             tarinfo = member
   2076 
   2077         if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
   2078             # Members with unknown types are treated as regular files.
   2079             return self.fileobject(self, tarinfo)
   2080 
   2081         elif tarinfo.islnk() or tarinfo.issym():
   2082             if isinstance(self.fileobj, _Stream):
   2083                 # A small but ugly workaround for the case that someone tries
   2084                 # to extract a (sym)link as a file-object from a non-seekable
   2085                 # stream of tar blocks.
   2086                 raise StreamError("cannot extract (sym)link as file object")
   2087             else:
   2088                 # A (sym)link's file object is its target's file object.
   2089                 return self.extractfile(self._find_link_target(tarinfo))
   2090         else:
   2091             # If there's no data associated with the member (directory, chrdev,
   2092             # blkdev, etc.), return None instead of a file object.
   2093             return None
   2094 
   2095     def _extract_member(self, tarinfo, targetpath, set_attrs=True,
   2096                         numeric_owner=False):
   2097         """Extract the TarInfo object tarinfo to a physical
   2098            file called targetpath.
   2099         """
   2100         # Fetch the TarInfo object for the given name
   2101         # and build the destination pathname, replacing
   2102         # forward slashes to platform specific separators.
   2103         targetpath = targetpath.rstrip("/")
   2104         targetpath = targetpath.replace("/", os.sep)
   2105 
   2106         # Create all upper directories.
   2107         upperdirs = os.path.dirname(targetpath)
   2108         if upperdirs and not os.path.exists(upperdirs):
   2109             # Create directories that are not part of the archive with
   2110             # default permissions.
   2111             os.makedirs(upperdirs)
   2112 
   2113         if tarinfo.islnk() or tarinfo.issym():
   2114             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
   2115         else:
   2116             self._dbg(1, tarinfo.name)
   2117 
   2118         if tarinfo.isreg():
   2119             self.makefile(tarinfo, targetpath)
   2120         elif tarinfo.isdir():
   2121             self.makedir(tarinfo, targetpath)
   2122         elif tarinfo.isfifo():
   2123             self.makefifo(tarinfo, targetpath)
   2124         elif tarinfo.ischr() or tarinfo.isblk():
   2125             self.makedev(tarinfo, targetpath)
   2126         elif tarinfo.islnk() or tarinfo.issym():
   2127             self.makelink(tarinfo, targetpath)
   2128         elif tarinfo.type not in SUPPORTED_TYPES:
   2129             self.makeunknown(tarinfo, targetpath)
   2130         else:
   2131             self.makefile(tarinfo, targetpath)
   2132 
   2133         if set_attrs:
   2134             self.chown(tarinfo, targetpath, numeric_owner)
   2135             if not tarinfo.issym():
   2136                 self.chmod(tarinfo, targetpath)
   2137                 self.utime(tarinfo, targetpath)
   2138 
   2139     #--------------------------------------------------------------------------
   2140     # Below are the different file methods. They are called via
   2141     # _extract_member() when extract() is called. They can be replaced in a
   2142     # subclass to implement other functionality.
   2143 
   2144     def makedir(self, tarinfo, targetpath):
   2145         """Make a directory called targetpath.
   2146         """
   2147         try:
   2148             # Use a safe mode for the directory, the real mode is set
   2149             # later in _extract_member().
   2150             os.mkdir(targetpath, 0o700)
   2151         except FileExistsError:
   2152             pass
   2153 
   2154     def makefile(self, tarinfo, targetpath):
   2155         """Make a file called targetpath.
   2156         """
   2157         source = self.fileobj
   2158         source.seek(tarinfo.offset_data)
   2159         bufsize = self.copybufsize
   2160         with bltn_open(targetpath, "wb") as target:
   2161             if tarinfo.sparse is not None:
   2162                 for offset, size in tarinfo.sparse:
   2163                     target.seek(offset)
   2164                     copyfileobj(source, target, size, ReadError, bufsize)
   2165                 target.seek(tarinfo.size)
   2166                 target.truncate()
   2167             else:
   2168                 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
   2169 
   2170     def makeunknown(self, tarinfo, targetpath):
   2171         """Make a file from a TarInfo object with an unknown type
   2172            at targetpath.
   2173         """
   2174         self.makefile(tarinfo, targetpath)
   2175         self._dbg(1, "tarfile: Unknown file type %r, " \
   2176                      "extracted as regular file." % tarinfo.type)
   2177 
   2178     def makefifo(self, tarinfo, targetpath):
   2179         """Make a fifo called targetpath.
   2180         """
   2181         if hasattr(os, "mkfifo"):
   2182             os.mkfifo(targetpath)
   2183         else:
   2184             raise ExtractError("fifo not supported by system")
   2185 
   2186     def makedev(self, tarinfo, targetpath):
   2187         """Make a character or block device called targetpath.
   2188         """
   2189         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
   2190             raise ExtractError("special devices not supported by system")
   2191 
   2192         mode = tarinfo.mode
   2193         if tarinfo.isblk():
   2194             mode |= stat.S_IFBLK
   2195         else:
   2196             mode |= stat.S_IFCHR
   2197 
   2198         os.mknod(targetpath, mode,
   2199                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
   2200 
   2201     def makelink(self, tarinfo, targetpath):
   2202         """Make a (symbolic) link called targetpath. If it cannot be created
   2203           (platform limitation), we try to make a copy of the referenced file
   2204           instead of a link.
   2205         """
   2206         try:
   2207             # For systems that support symbolic and hard links.
   2208             if tarinfo.issym():
   2209                 os.symlink(tarinfo.linkname, targetpath)
   2210             else:
   2211                 # See extract().
   2212                 if os.path.exists(tarinfo._link_target):
   2213                     os.link(tarinfo._link_target, targetpath)
   2214                 else:
   2215                     self._extract_member(self._find_link_target(tarinfo),
   2216                                          targetpath)
   2217         except symlink_exception:
   2218             try:
   2219                 self._extract_member(self._find_link_target(tarinfo),
   2220                                      targetpath)
   2221             except KeyError:
   2222                 raise ExtractError("unable to resolve link inside archive")
   2223 
   2224     def chown(self, tarinfo, targetpath, numeric_owner):
   2225         """Set owner of targetpath according to tarinfo. If numeric_owner
   2226            is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
   2227            is False, fall back to .gid/.uid when the search based on name
   2228            fails.
   2229         """
   2230         if hasattr(os, "geteuid") and os.geteuid() == 0:
   2231             # We have to be root to do so.
   2232             g = tarinfo.gid
   2233             u = tarinfo.uid
   2234             if not numeric_owner:
   2235                 try:
   2236                     if grp:
   2237                         g = grp.getgrnam(tarinfo.gname)[2]
   2238                 except KeyError:
   2239                     pass
   2240                 try:
   2241                     if pwd:
   2242                         u = pwd.getpwnam(tarinfo.uname)[2]
   2243                 except KeyError:
   2244                     pass
   2245             try:
   2246                 if tarinfo.issym() and hasattr(os, "lchown"):
   2247                     os.lchown(targetpath, u, g)
   2248                 else:
   2249                     os.chown(targetpath, u, g)
   2250             except OSError:
   2251                 raise ExtractError("could not change owner")
   2252 
   2253     def chmod(self, tarinfo, targetpath):
   2254         """Set file permissions of targetpath according to tarinfo.
   2255         """
   2256         if hasattr(os, 'chmod'):
   2257             try:
   2258                 os.chmod(targetpath, tarinfo.mode)
   2259             except OSError:
   2260                 raise ExtractError("could not change mode")
   2261 
   2262     def utime(self, tarinfo, targetpath):
   2263         """Set modification time of targetpath according to tarinfo.
   2264         """
   2265         if not hasattr(os, 'utime'):
   2266             return
   2267         try:
   2268             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
   2269         except OSError:
   2270             raise ExtractError("could not change modification time")
   2271 
   2272     #--------------------------------------------------------------------------
   2273     def next(self):
   2274         """Return the next member of the archive as a TarInfo object, when
   2275            TarFile is opened for reading. Return None if there is no more
   2276            available.
   2277         """
   2278         self._check("ra")
   2279         if self.firstmember is not None:
   2280             m = self.firstmember
   2281             self.firstmember = None
   2282             return m
   2283 
   2284         # Advance the file pointer.
   2285         if self.offset != self.fileobj.tell():
   2286             self.fileobj.seek(self.offset - 1)
   2287             if not self.fileobj.read(1):
   2288                 raise ReadError("unexpected end of data")
   2289 
   2290         # Read the next block.
   2291         tarinfo = None
   2292         while True:
   2293             try:
   2294                 tarinfo = self.tarinfo.fromtarfile(self)
   2295             except EOFHeaderError as e:
   2296                 if self.ignore_zeros:
   2297                     self._dbg(2, "0x%X: %s" % (self.offset, e))
   2298                     self.offset += BLOCKSIZE
   2299                     continue
   2300             except InvalidHeaderError as e:
   2301                 if self.ignore_zeros:
   2302                     self._dbg(2, "0x%X: %s" % (self.offset, e))
   2303                     self.offset += BLOCKSIZE
   2304                     continue
   2305                 elif self.offset == 0:
   2306                     raise ReadError(str(e))
   2307             except EmptyHeaderError:
   2308                 if self.offset == 0:
   2309                     raise ReadError("empty file")
   2310             except TruncatedHeaderError as e:
   2311                 if self.offset == 0:
   2312                     raise ReadError(str(e))
   2313             except SubsequentHeaderError as e:
   2314                 raise ReadError(str(e))
   2315             break
   2316 
   2317         if tarinfo is not None:
   2318             self.members.append(tarinfo)
   2319         else:
   2320             self._loaded = True
   2321 
   2322         return tarinfo
   2323 
   2324     #--------------------------------------------------------------------------
   2325     # Little helper methods:
   2326 
   2327     def _getmember(self, name, tarinfo=None, normalize=False):
   2328         """Find an archive member by name from bottom to top.
   2329            If tarinfo is given, it is used as the starting point.
   2330         """
   2331         # Ensure that all members have been loaded.
   2332         members = self.getmembers()
   2333 
   2334         # Limit the member search list up to tarinfo.
   2335         if tarinfo is not None:
   2336             members = members[:members.index(tarinfo)]
   2337 
   2338         if normalize:
   2339             name = os.path.normpath(name)
   2340 
   2341         for member in reversed(members):
   2342             if normalize:
   2343                 member_name = os.path.normpath(member.name)
   2344             else:
   2345                 member_name = member.name
   2346 
   2347             if name == member_name:
   2348                 return member
   2349 
   2350     def _load(self):
   2351         """Read through the entire archive file and look for readable
   2352            members.
   2353         """
   2354         while True:
   2355             tarinfo = self.next()
   2356             if tarinfo is None:
   2357                 break
   2358         self._loaded = True
   2359 
   2360     def _check(self, mode=None):
   2361         """Check if TarFile is still open, and if the operation's mode
   2362            corresponds to TarFile's mode.
   2363         """
   2364         if self.closed:
   2365             raise OSError("%s is closed" % self.__class__.__name__)
   2366         if mode is not None and self.mode not in mode:
   2367             raise OSError("bad operation for mode %r" % self.mode)
   2368 
   2369     def _find_link_target(self, tarinfo):
   2370         """Find the target member of a symlink or hardlink member in the
   2371            archive.
   2372         """
   2373         if tarinfo.issym():
   2374             # Always search the entire archive.
   2375             linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
   2376             limit = None
   2377         else:
   2378             # Search the archive before the link, because a hard link is
   2379             # just a reference to an already archived file.
   2380             linkname = tarinfo.linkname
   2381             limit = tarinfo
   2382 
   2383         member = self._getmember(linkname, tarinfo=limit, normalize=True)
   2384         if member is None:
   2385             raise KeyError("linkname %r not found" % linkname)
   2386         return member
   2387 
   2388     def __iter__(self):
   2389         """Provide an iterator object.
   2390         """
   2391         if self._loaded:
   2392             yield from self.members
   2393             return
   2394 
   2395         # Yield items using TarFile's next() method.
   2396         # When all members have been read, set TarFile as _loaded.
   2397         index = 0
   2398         # Fix for SF #1100429: Under rare circumstances it can
   2399         # happen that getmembers() is called during iteration,
   2400         # which will have already exhausted the next() method.
   2401         if self.firstmember is not None:
   2402             tarinfo = self.next()
   2403             index += 1
   2404             yield tarinfo
   2405 
   2406         while True:
   2407             if index < len(self.members):
   2408                 tarinfo = self.members[index]
   2409             elif not self._loaded:
   2410                 tarinfo = self.next()
   2411                 if not tarinfo:
   2412                     self._loaded = True
   2413                     return
   2414             else:
   2415                 return
   2416             index += 1
   2417             yield tarinfo
   2418 
   2419     def _dbg(self, level, msg):
   2420         """Write debugging output to sys.stderr.
   2421         """
   2422         if level <= self.debug:
   2423             print(msg, file=sys.stderr)
   2424 
   2425     def __enter__(self):
   2426         self._check()
   2427         return self
   2428 
   2429     def __exit__(self, type, value, traceback):
   2430         if type is None:
   2431             self.close()
   2432         else:
   2433             # An exception occurred. We must not call close() because
   2434             # it would try to write end-of-archive blocks and padding.
   2435             if not self._extfileobj:
   2436                 self.fileobj.close()
   2437             self.closed = True
   2438 
   2439 #--------------------
   2440 # exported functions
   2441 #--------------------
   2442 def is_tarfile(name):
   2443     """Return True if name points to a tar archive that we
   2444        are able to handle, else return False.
   2445     """
   2446     try:
   2447         t = open(name)
   2448         t.close()
   2449         return True
   2450     except TarError:
   2451         return False
   2452 
   2453 open = TarFile.open
   2454 
   2455 
   2456 def main():
   2457     import argparse
   2458 
   2459     description = 'A simple command line interface for tarfile module.'
   2460     parser = argparse.ArgumentParser(description=description)
   2461     parser.add_argument('-v', '--verbose', action='store_true', default=False,
   2462                         help='Verbose output')
   2463     group = parser.add_mutually_exclusive_group()
   2464     group.add_argument('-l', '--list', metavar='<tarfile>',
   2465                        help='Show listing of a tarfile')
   2466     group.add_argument('-e', '--extract', nargs='+',
   2467                        metavar=('<tarfile>', '<output_dir>'),
   2468                        help='Extract tarfile into target dir')
   2469     group.add_argument('-c', '--create', nargs='+',
   2470                        metavar=('<name>', '<file>'),
   2471                        help='Create tarfile from sources')
   2472     group.add_argument('-t', '--test', metavar='<tarfile>',
   2473                        help='Test if a tarfile is valid')
   2474     args = parser.parse_args()
   2475 
   2476     if args.test:
   2477         src = args.test
   2478         if is_tarfile(src):
   2479             with open(src, 'r') as tar:
   2480                 tar.getmembers()
   2481                 print(tar.getmembers(), file=sys.stderr)
   2482             if args.verbose:
   2483                 print('{!r} is a tar archive.'.format(src))
   2484         else:
   2485             parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
   2486 
   2487     elif args.list:
   2488         src = args.list
   2489         if is_tarfile(src):
   2490             with TarFile.open(src, 'r:*') as tf:
   2491                 tf.list(verbose=args.verbose)
   2492         else:
   2493             parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
   2494 
   2495     elif args.extract:
   2496         if len(args.extract) == 1:
   2497             src = args.extract[0]
   2498             curdir = os.curdir
   2499         elif len(args.extract) == 2:
   2500             src, curdir = args.extract
   2501         else:
   2502             parser.exit(1, parser.format_help())
   2503 
   2504         if is_tarfile(src):
   2505             with TarFile.open(src, 'r:*') as tf:
   2506                 tf.extractall(path=curdir)
   2507             if args.verbose:
   2508                 if curdir == '.':
   2509                     msg = '{!r} file is extracted.'.format(src)
   2510                 else:
   2511                     msg = ('{!r} file is extracted '
   2512                            'into {!r} directory.').format(src, curdir)
   2513                 print(msg)
   2514         else:
   2515             parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
   2516 
   2517     elif args.create:
   2518         tar_name = args.create.pop(0)
   2519         _, ext = os.path.splitext(tar_name)
   2520         compressions = {
   2521             # gz
   2522             '.gz': 'gz',
   2523             '.tgz': 'gz',
   2524             # xz
   2525             '.xz': 'xz',
   2526             '.txz': 'xz',
   2527             # bz2
   2528             '.bz2': 'bz2',
   2529             '.tbz': 'bz2',
   2530             '.tbz2': 'bz2',
   2531             '.tb2': 'bz2',
   2532         }
   2533         tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
   2534         tar_files = args.create
   2535 
   2536         with TarFile.open(tar_name, tar_mode) as tf:
   2537             for file_name in tar_files:
   2538                 tf.add(file_name)
   2539 
   2540         if args.verbose:
   2541             print('{!r} file created.'.format(tar_name))
   2542 
   2543     else:
   2544         parser.exit(1, parser.format_help())
   2545 
   2546 if __name__ == '__main__':
   2547     main()
   2548