Home | History | Annotate | Download | only in Lib
      1 #!/usr/bin/env python

      2 # -*- coding: iso-8859-1 -*-

      3 #-------------------------------------------------------------------

      4 # tarfile.py

      5 #-------------------------------------------------------------------

      6 # Copyright (C) 2002 Lars Gustbel <lars (at] gustaebel.de>

      7 # All rights reserved.

      8 #

      9 # Permission  is  hereby granted,  free  of charge,  to  any person

     10 # obtaining a  copy of  this software  and associated documentation

     11 # files  (the  "Software"),  to   deal  in  the  Software   without

     12 # restriction,  including  without limitation  the  rights to  use,

     13 # copy, modify, merge, publish, distribute, sublicense, and/or sell

     14 # copies  of  the  Software,  and to  permit  persons  to  whom the

     15 # Software  is  furnished  to  do  so,  subject  to  the  following

     16 # conditions:

     17 #

     18 # The above copyright  notice and this  permission notice shall  be

     19 # included in all copies or substantial portions of the Software.

     20 #

     21 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,

     22 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES

     23 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND

     24 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT

     25 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,

     26 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING

     27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR

     28 # OTHER DEALINGS IN THE SOFTWARE.

     29 #

     30 """Read from and write to tar format archives.
     31 """
     32 
     33 __version__ = "$Revision$"
     34 # $Source$

     35 
     36 version     = "0.9.0"
     37 __author__  = "Lars Gustbel (lars (at] gustaebel.de)"
     38 __date__    = "$Date$"
     39 __cvsid__   = "$Id$"
     40 __credits__ = "Gustavo Niemeyer, Niels Gustbel, Richard Townsend."
     41 
     42 #---------

     43 # Imports

     44 #---------

     45 import sys
     46 import os
     47 import shutil
     48 import stat
     49 import errno
     50 import time
     51 import struct
     52 import copy
     53 import re
     54 import operator
     55 
     56 try:
     57     import grp, pwd
     58 except ImportError:
     59     grp = pwd = None
     60 
     61 # from tarfile import *

     62 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
     63 
     64 #---------------------------------------------------------

     65 # tar constants

     66 #---------------------------------------------------------

     67 NUL = "\0"                      # the null character

     68 BLOCKSIZE = 512                 # length of processing blocks

     69 RECORDSIZE = BLOCKSIZE * 20     # length of records

     70 GNU_MAGIC = "ustar  \0"         # magic gnu tar string

     71 POSIX_MAGIC = "ustar\x0000"     # magic posix tar string

     72 
     73 LENGTH_NAME = 100               # maximum length of a filename

     74 LENGTH_LINK = 100               # maximum length of a linkname

     75 LENGTH_PREFIX = 155             # maximum length of the prefix field

     76 
     77 REGTYPE = "0"                   # regular file

     78 AREGTYPE = "\0"                 # regular file

     79 LNKTYPE = "1"                   # link (inside tarfile)

     80 SYMTYPE = "2"                   # symbolic link

     81 CHRTYPE = "3"                   # character special device

     82 BLKTYPE = "4"                   # block special device

     83 DIRTYPE = "5"                   # directory

     84 FIFOTYPE = "6"                  # fifo special device

     85 CONTTYPE = "7"                  # contiguous file

     86 
     87 GNUTYPE_LONGNAME = "L"          # GNU tar longname

     88 GNUTYPE_LONGLINK = "K"          # GNU tar longlink

     89 GNUTYPE_SPARSE = "S"            # GNU tar sparse file

     90 
     91 XHDTYPE = "x"                   # POSIX.1-2001 extended header

     92 XGLTYPE = "g"                   # POSIX.1-2001 global header

     93 SOLARIS_XHDTYPE = "X"           # Solaris extended header

     94 
     95 USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format

     96 GNU_FORMAT = 1                  # GNU tar format

     97 PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format

     98 DEFAULT_FORMAT = GNU_FORMAT
     99 
    100 #---------------------------------------------------------

    101 # tarfile constants

    102 #---------------------------------------------------------

    103 # File types that tarfile supports:

    104 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
    105                    SYMTYPE, DIRTYPE, FIFOTYPE,
    106                    CONTTYPE, CHRTYPE, BLKTYPE,
    107                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
    108                    GNUTYPE_SPARSE)
    109 
    110 # File types that will be treated as a regular file.

    111 REGULAR_TYPES = (REGTYPE, AREGTYPE,
    112                  CONTTYPE, GNUTYPE_SPARSE)
    113 
    114 # File types that are part of the GNU tar format.

    115 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
    116              GNUTYPE_SPARSE)
    117 
    118 # Fields from a pax header that override a TarInfo attribute.

    119 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
    120               "uid", "gid", "uname", "gname")
    121 
    122 # Fields in a pax header that are numbers, all other fields

    123 # are treated as strings.

    124 PAX_NUMBER_FIELDS = {
    125     "atime": float,
    126     "ctime": float,
    127     "mtime": float,
    128     "uid": int,
    129     "gid": int,
    130     "size": int
    131 }
    132 
    133 #---------------------------------------------------------

    134 # Bits used in the mode field, values in octal.

    135 #---------------------------------------------------------

    136 S_IFLNK = 0120000        # symbolic link

    137 S_IFREG = 0100000        # regular file

    138 S_IFBLK = 0060000        # block device

    139 S_IFDIR = 0040000        # directory

    140 S_IFCHR = 0020000        # character device

    141 S_IFIFO = 0010000        # fifo

    142 
    143 TSUID   = 04000          # set UID on execution

    144 TSGID   = 02000          # set GID on execution

    145 TSVTX   = 01000          # reserved

    146 
    147 TUREAD  = 0400           # read by owner

    148 TUWRITE = 0200           # write by owner

    149 TUEXEC  = 0100           # execute/search by owner

    150 TGREAD  = 0040           # read by group

    151 TGWRITE = 0020           # write by group

    152 TGEXEC  = 0010           # execute/search by group

    153 TOREAD  = 0004           # read by other

    154 TOWRITE = 0002           # write by other

    155 TOEXEC  = 0001           # execute/search by other

    156 
    157 #---------------------------------------------------------

    158 # initialization

    159 #---------------------------------------------------------

    160 ENCODING = sys.getfilesystemencoding()
    161 if ENCODING is None:
    162     ENCODING = sys.getdefaultencoding()
    163 
    164 #---------------------------------------------------------

    165 # Some useful functions

    166 #---------------------------------------------------------

    167 
    168 def stn(s, length):
    169     """Convert a python string to a null-terminated string buffer.
    170     """
    171     return s[:length] + (length - len(s)) * NUL
    172 
    173 def nts(s):
    174     """Convert a null-terminated string field to a python string.
    175     """
    176     # Use the string up to the first null char.

    177     p = s.find("\0")
    178     if p == -1:
    179         return s
    180     return s[:p]
    181 
    182 def nti(s):
    183     """Convert a number field to a python number.
    184     """
    185     # There are two possible encodings for a number field, see

    186     # itn() below.

    187     if s[0] != chr(0200):
    188         try:
    189             n = int(nts(s) or "0", 8)
    190         except ValueError:
    191             raise InvalidHeaderError("invalid header")
    192     else:
    193         n = 0L
    194         for i in xrange(len(s) - 1):
    195             n <<= 8
    196             n += ord(s[i + 1])
    197     return n
    198 
    199 def itn(n, digits=8, format=DEFAULT_FORMAT):
    200     """Convert a python number to a number field.
    201     """
    202     # POSIX 1003.1-1988 requires numbers to be encoded as a string of

    203     # octal digits followed by a null-byte, this allows values up to

    204     # (8**(digits-1))-1. GNU tar allows storing numbers greater than

    205     # that if necessary. A leading 0200 byte indicates this particular

    206     # encoding, the following digits-1 bytes are a big-endian

    207     # representation. This allows values up to (256**(digits-1))-1.

    208     if 0 <= n < 8 ** (digits - 1):
    209         s = "%0*o" % (digits - 1, n) + NUL
    210     else:
    211         if format != GNU_FORMAT or n >= 256 ** (digits - 1):
    212             raise ValueError("overflow in number field")
    213 
    214         if n < 0:
    215             # XXX We mimic GNU tar's behaviour with negative numbers,

    216             # this could raise OverflowError.

    217             n = struct.unpack("L", struct.pack("l", n))[0]
    218 
    219         s = ""
    220         for i in xrange(digits - 1):
    221             s = chr(n & 0377) + s
    222             n >>= 8
    223         s = chr(0200) + s
    224     return s
    225 
    226 def uts(s, encoding, errors):
    227     """Convert a unicode object to a string.
    228     """
    229     if errors == "utf-8":
    230         # An extra error handler similar to the -o invalid=UTF-8 option

    231         # in POSIX.1-2001. Replace untranslatable characters with their

    232         # UTF-8 representation.

    233         try:
    234             return s.encode(encoding, "strict")
    235         except UnicodeEncodeError:
    236             x = []
    237             for c in s:
    238                 try:
    239                     x.append(c.encode(encoding, "strict"))
    240                 except UnicodeEncodeError:
    241                     x.append(c.encode("utf8"))
    242             return "".join(x)
    243     else:
    244         return s.encode(encoding, errors)
    245 
    246 def calc_chksums(buf):
    247     """Calculate the checksum for a member's header by summing up all
    248        characters except for the chksum field which is treated as if
    249        it was filled with spaces. According to the GNU tar sources,
    250        some tars (Sun and NeXT) calculate chksum with signed char,
    251        which will be different if there are chars in the buffer with
    252        the high bit set. So we calculate two checksums, unsigned and
    253        signed.
    254     """
    255     unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
    256     signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
    257     return unsigned_chksum, signed_chksum
    258 
    259 def copyfileobj(src, dst, length=None):
    260     """Copy length bytes from fileobj src to fileobj dst.
    261        If length is None, copy the entire content.
    262     """
    263     if length == 0:
    264         return
    265     if length is None:
    266         shutil.copyfileobj(src, dst)
    267         return
    268 
    269     BUFSIZE = 16 * 1024
    270     blocks, remainder = divmod(length, BUFSIZE)
    271     for b in xrange(blocks):
    272         buf = src.read(BUFSIZE)
    273         if len(buf) < BUFSIZE:
    274             raise IOError("end of file reached")
    275         dst.write(buf)
    276 
    277     if remainder != 0:
    278         buf = src.read(remainder)
    279         if len(buf) < remainder:
    280             raise IOError("end of file reached")
    281         dst.write(buf)
    282     return
    283 
    284 filemode_table = (
    285     ((S_IFLNK,      "l"),
    286      (S_IFREG,      "-"),
    287      (S_IFBLK,      "b"),
    288      (S_IFDIR,      "d"),
    289      (S_IFCHR,      "c"),
    290      (S_IFIFO,      "p")),
    291 
    292     ((TUREAD,       "r"),),
    293     ((TUWRITE,      "w"),),
    294     ((TUEXEC|TSUID, "s"),
    295      (TSUID,        "S"),
    296      (TUEXEC,       "x")),
    297 
    298     ((TGREAD,       "r"),),
    299     ((TGWRITE,      "w"),),
    300     ((TGEXEC|TSGID, "s"),
    301      (TSGID,        "S"),
    302      (TGEXEC,       "x")),
    303 
    304     ((TOREAD,       "r"),),
    305     ((TOWRITE,      "w"),),
    306     ((TOEXEC|TSVTX, "t"),
    307      (TSVTX,        "T"),
    308      (TOEXEC,       "x"))
    309 )
    310 
    311 def filemode(mode):
    312     """Convert a file's mode to a string of the form
    313        -rwxrwxrwx.
    314        Used by TarFile.list()
    315     """
    316     perm = []
    317     for table in filemode_table:
    318         for bit, char in table:
    319             if mode & bit == bit:
    320                 perm.append(char)
    321                 break
    322         else:
    323             perm.append("-")
    324     return "".join(perm)
    325 
    326 class TarError(Exception):
    327     """Base exception."""
    328     pass
    329 class ExtractError(TarError):
    330     """General exception for extract errors."""
    331     pass
    332 class ReadError(TarError):
    333     """Exception for unreadble tar archives."""
    334     pass
    335 class CompressionError(TarError):
    336     """Exception for unavailable compression methods."""
    337     pass
    338 class StreamError(TarError):
    339     """Exception for unsupported operations on stream-like TarFiles."""
    340     pass
    341 class HeaderError(TarError):
    342     """Base exception for header errors."""
    343     pass
    344 class EmptyHeaderError(HeaderError):
    345     """Exception for empty headers."""
    346     pass
    347 class TruncatedHeaderError(HeaderError):
    348     """Exception for truncated headers."""
    349     pass
    350 class EOFHeaderError(HeaderError):
    351     """Exception for end of file headers."""
    352     pass
    353 class InvalidHeaderError(HeaderError):
    354     """Exception for invalid headers."""
    355     pass
    356 class SubsequentHeaderError(HeaderError):
    357     """Exception for missing and invalid extended headers."""
    358     pass
    359 
    360 #---------------------------

    361 # internal stream interface

    362 #---------------------------

    363 class _LowLevelFile:
    364     """Low-level file object. Supports reading and writing.
    365        It is used instead of a regular file object for streaming
    366        access.
    367     """
    368 
    369     def __init__(self, name, mode):
    370         mode = {
    371             "r": os.O_RDONLY,
    372             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
    373         }[mode]
    374         if hasattr(os, "O_BINARY"):
    375             mode |= os.O_BINARY
    376         self.fd = os.open(name, mode, 0666)
    377 
    378     def close(self):
    379         os.close(self.fd)
    380 
    381     def read(self, size):
    382         return os.read(self.fd, size)
    383 
    384     def write(self, s):
    385         os.write(self.fd, s)
    386 
    387 class _Stream:
    388     """Class that serves as an adapter between TarFile and
    389        a stream-like object.  The stream-like object only
    390        needs to have a read() or write() method and is accessed
    391        blockwise.  Use of gzip or bzip2 compression is possible.
    392        A stream-like object could be for example: sys.stdin,
    393        sys.stdout, a socket, a tape device etc.
    394 
    395        _Stream is intended to be used only internally.
    396     """
    397 
    398     def __init__(self, name, mode, comptype, fileobj, bufsize):
    399         """Construct a _Stream object.
    400         """
    401         self._extfileobj = True
    402         if fileobj is None:
    403             fileobj = _LowLevelFile(name, mode)
    404             self._extfileobj = False
    405 
    406         if comptype == '*':
    407             # Enable transparent compression detection for the

    408             # stream interface

    409             fileobj = _StreamProxy(fileobj)
    410             comptype = fileobj.getcomptype()
    411 
    412         self.name     = name or ""
    413         self.mode     = mode
    414         self.comptype = comptype
    415         self.fileobj  = fileobj
    416         self.bufsize  = bufsize
    417         self.buf      = ""
    418         self.pos      = 0L
    419         self.closed   = False
    420 
    421         if comptype == "gz":
    422             try:
    423                 import zlib
    424             except ImportError:
    425                 raise CompressionError("zlib module is not available")
    426             self.zlib = zlib
    427             self.crc = zlib.crc32("") & 0xffffffffL
    428             if mode == "r":
    429                 self._init_read_gz()
    430             else:
    431                 self._init_write_gz()
    432 
    433         if comptype == "bz2":
    434             try:
    435                 import bz2
    436             except ImportError:
    437                 raise CompressionError("bz2 module is not available")
    438             if mode == "r":
    439                 self.dbuf = ""
    440                 self.cmp = bz2.BZ2Decompressor()
    441             else:
    442                 self.cmp = bz2.BZ2Compressor()
    443 
    444     def __del__(self):
    445         if hasattr(self, "closed") and not self.closed:
    446             self.close()
    447 
    448     def _init_write_gz(self):
    449         """Initialize for writing with gzip compression.
    450         """
    451         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
    452                                             -self.zlib.MAX_WBITS,
    453                                             self.zlib.DEF_MEM_LEVEL,
    454                                             0)
    455         timestamp = struct.pack("<L", long(time.time()))
    456         self.__write("\037\213\010\010%s\002\377" % timestamp)
    457         if self.name.endswith(".gz"):
    458             self.name = self.name[:-3]
    459         self.__write(self.name + NUL)
    460 
    461     def write(self, s):
    462         """Write string s to the stream.
    463         """
    464         if self.comptype == "gz":
    465             self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
    466         self.pos += len(s)
    467         if self.comptype != "tar":
    468             s = self.cmp.compress(s)
    469         self.__write(s)
    470 
    471     def __write(self, s):
    472         """Write string s to the stream if a whole new block
    473            is ready to be written.
    474         """
    475         self.buf += s
    476         while len(self.buf) > self.bufsize:
    477             self.fileobj.write(self.buf[:self.bufsize])
    478             self.buf = self.buf[self.bufsize:]
    479 
    480     def close(self):
    481         """Close the _Stream object. No operation should be
    482            done on it afterwards.
    483         """
    484         if self.closed:
    485             return
    486 
    487         if self.mode == "w" and self.comptype != "tar":
    488             self.buf += self.cmp.flush()
    489 
    490         if self.mode == "w" and self.buf:
    491             self.fileobj.write(self.buf)
    492             self.buf = ""
    493             if self.comptype == "gz":
    494                 # The native zlib crc is an unsigned 32-bit integer, but

    495                 # the Python wrapper implicitly casts that to a signed C

    496                 # long.  So, on a 32-bit box self.crc may "look negative",

    497                 # while the same crc on a 64-bit box may "look positive".

    498                 # To avoid irksome warnings from the `struct` module, force

    499                 # it to look positive on all boxes.

    500                 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
    501                 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
    502 
    503         if not self._extfileobj:
    504             self.fileobj.close()
    505 
    506         self.closed = True
    507 
    508     def _init_read_gz(self):
    509         """Initialize for reading a gzip compressed fileobj.
    510         """
    511         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
    512         self.dbuf = ""
    513 
    514         # taken from gzip.GzipFile with some alterations

    515         if self.__read(2) != "\037\213":
    516             raise ReadError("not a gzip file")
    517         if self.__read(1) != "\010":
    518             raise CompressionError("unsupported compression method")
    519 
    520         flag = ord(self.__read(1))
    521         self.__read(6)
    522 
    523         if flag & 4:
    524             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
    525             self.read(xlen)
    526         if flag & 8:
    527             while True:
    528                 s = self.__read(1)
    529                 if not s or s == NUL:
    530                     break
    531         if flag & 16:
    532             while True:
    533                 s = self.__read(1)
    534                 if not s or s == NUL:
    535                     break
    536         if flag & 2:
    537             self.__read(2)
    538 
    539     def tell(self):
    540         """Return the stream's file pointer position.
    541         """
    542         return self.pos
    543 
    544     def seek(self, pos=0):
    545         """Set the stream's file pointer to pos. Negative seeking
    546            is forbidden.
    547         """
    548         if pos - self.pos >= 0:
    549             blocks, remainder = divmod(pos - self.pos, self.bufsize)
    550             for i in xrange(blocks):
    551                 self.read(self.bufsize)
    552             self.read(remainder)
    553         else:
    554             raise StreamError("seeking backwards is not allowed")
    555         return self.pos
    556 
    557     def read(self, size=None):
    558         """Return the next size number of bytes from the stream.
    559            If size is not defined, return all bytes of the stream
    560            up to EOF.
    561         """
    562         if size is None:
    563             t = []
    564             while True:
    565                 buf = self._read(self.bufsize)
    566                 if not buf:
    567                     break
    568                 t.append(buf)
    569             buf = "".join(t)
    570         else:
    571             buf = self._read(size)
    572         self.pos += len(buf)
    573         return buf
    574 
    575     def _read(self, size):
    576         """Return size bytes from the stream.
    577         """
    578         if self.comptype == "tar":
    579             return self.__read(size)
    580 
    581         c = len(self.dbuf)
    582         t = [self.dbuf]
    583         while c < size:
    584             buf = self.__read(self.bufsize)
    585             if not buf:
    586                 break
    587             try:
    588                 buf = self.cmp.decompress(buf)
    589             except IOError:
    590                 raise ReadError("invalid compressed data")
    591             t.append(buf)
    592             c += len(buf)
    593         t = "".join(t)
    594         self.dbuf = t[size:]
    595         return t[:size]
    596 
    597     def __read(self, size):
    598         """Return size bytes from stream. If internal buffer is empty,
    599            read another block from the stream.
    600         """
    601         c = len(self.buf)
    602         t = [self.buf]
    603         while c < size:
    604             buf = self.fileobj.read(self.bufsize)
    605             if not buf:
    606                 break
    607             t.append(buf)
    608             c += len(buf)
    609         t = "".join(t)
    610         self.buf = t[size:]
    611         return t[:size]
    612 # class _Stream

    613 
    614 class _StreamProxy(object):
    615     """Small proxy class that enables transparent compression
    616        detection for the Stream interface (mode 'r|*').
    617     """
    618 
    619     def __init__(self, fileobj):
    620         self.fileobj = fileobj
    621         self.buf = self.fileobj.read(BLOCKSIZE)
    622 
    623     def read(self, size):
    624         self.read = self.fileobj.read
    625         return self.buf
    626 
    627     def getcomptype(self):
    628         if self.buf.startswith("\037\213\010"):
    629             return "gz"
    630         if self.buf.startswith("BZh91"):
    631             return "bz2"
    632         return "tar"
    633 
    634     def close(self):
    635         self.fileobj.close()
    636 # class StreamProxy

    637 
    638 class _BZ2Proxy(object):
    639     """Small proxy class that enables external file object
    640        support for "r:bz2" and "w:bz2" modes. This is actually
    641        a workaround for a limitation in bz2 module's BZ2File
    642        class which (unlike gzip.GzipFile) has no support for
    643        a file object argument.
    644     """
    645 
    646     blocksize = 16 * 1024
    647 
    648     def __init__(self, fileobj, mode):
    649         self.fileobj = fileobj
    650         self.mode = mode
    651         self.name = getattr(self.fileobj, "name", None)
    652         self.init()
    653 
    654     def init(self):
    655         import bz2
    656         self.pos = 0
    657         if self.mode == "r":
    658             self.bz2obj = bz2.BZ2Decompressor()
    659             self.fileobj.seek(0)
    660             self.buf = ""
    661         else:
    662             self.bz2obj = bz2.BZ2Compressor()
    663 
    664     def read(self, size):
    665         b = [self.buf]
    666         x = len(self.buf)
    667         while x < size:
    668             raw = self.fileobj.read(self.blocksize)
    669             if not raw:
    670                 break
    671             data = self.bz2obj.decompress(raw)
    672             b.append(data)
    673             x += len(data)
    674         self.buf = "".join(b)
    675 
    676         buf = self.buf[:size]
    677         self.buf = self.buf[size:]
    678         self.pos += len(buf)
    679         return buf
    680 
    681     def seek(self, pos):
    682         if pos < self.pos:
    683             self.init()
    684         self.read(pos - self.pos)
    685 
    686     def tell(self):
    687         return self.pos
    688 
    689     def write(self, data):
    690         self.pos += len(data)
    691         raw = self.bz2obj.compress(data)
    692         self.fileobj.write(raw)
    693 
    694     def close(self):
    695         if self.mode == "w":
    696             raw = self.bz2obj.flush()
    697             self.fileobj.write(raw)
    698 # class _BZ2Proxy

    699 
    700 #------------------------

    701 # Extraction file object

    702 #------------------------

    703 class _FileInFile(object):
    704     """A thin wrapper around an existing file object that
    705        provides a part of its data as an individual file
    706        object.
    707     """
    708 
    709     def __init__(self, fileobj, offset, size, sparse=None):
    710         self.fileobj = fileobj
    711         self.offset = offset
    712         self.size = size
    713         self.sparse = sparse
    714         self.position = 0
    715 
    716     def tell(self):
    717         """Return the current file position.
    718         """
    719         return self.position
    720 
    721     def seek(self, position):
    722         """Seek to a position in the file.
    723         """
    724         self.position = position
    725 
    726     def read(self, size=None):
    727         """Read data from the file.
    728         """
    729         if size is None:
    730             size = self.size - self.position
    731         else:
    732             size = min(size, self.size - self.position)
    733 
    734         if self.sparse is None:
    735             return self.readnormal(size)
    736         else:
    737             return self.readsparse(size)
    738 
    739     def readnormal(self, size):
    740         """Read operation for regular files.
    741         """
    742         self.fileobj.seek(self.offset + self.position)
    743         self.position += size
    744         return self.fileobj.read(size)
    745 
    746     def readsparse(self, size):
    747         """Read operation for sparse files.
    748         """
    749         data = []
    750         while size > 0:
    751             buf = self.readsparsesection(size)
    752             if not buf:
    753                 break
    754             size -= len(buf)
    755             data.append(buf)
    756         return "".join(data)
    757 
    758     def readsparsesection(self, size):
    759         """Read a single section of a sparse file.
    760         """
    761         section = self.sparse.find(self.position)
    762 
    763         if section is None:
    764             return ""
    765 
    766         size = min(size, section.offset + section.size - self.position)
    767 
    768         if isinstance(section, _data):
    769             realpos = section.realpos + self.position - section.offset
    770             self.fileobj.seek(self.offset + realpos)
    771             self.position += size
    772             return self.fileobj.read(size)
    773         else:
    774             self.position += size
    775             return NUL * size
    776 #class _FileInFile

    777 
    778 
    779 class ExFileObject(object):
    780     """File-like object for reading an archive member.
    781        Is returned by TarFile.extractfile().
    782     """
    783     blocksize = 1024
    784 
    785     def __init__(self, tarfile, tarinfo):
    786         self.fileobj = _FileInFile(tarfile.fileobj,
    787                                    tarinfo.offset_data,
    788                                    tarinfo.size,
    789                                    getattr(tarinfo, "sparse", None))
    790         self.name = tarinfo.name
    791         self.mode = "r"
    792         self.closed = False
    793         self.size = tarinfo.size
    794 
    795         self.position = 0
    796         self.buffer = ""
    797 
    798     def read(self, size=None):
    799         """Read at most size bytes from the file. If size is not
    800            present or None, read all data until EOF is reached.
    801         """
    802         if self.closed:
    803             raise ValueError("I/O operation on closed file")
    804 
    805         buf = ""
    806         if self.buffer:
    807             if size is None:
    808                 buf = self.buffer
    809                 self.buffer = ""
    810             else:
    811                 buf = self.buffer[:size]
    812                 self.buffer = self.buffer[size:]
    813 
    814         if size is None:
    815             buf += self.fileobj.read()
    816         else:
    817             buf += self.fileobj.read(size - len(buf))
    818 
    819         self.position += len(buf)
    820         return buf
    821 
    822     def readline(self, size=-1):
    823         """Read one entire line from the file. If size is present
    824            and non-negative, return a string with at most that
    825            size, which may be an incomplete line.
    826         """
    827         if self.closed:
    828             raise ValueError("I/O operation on closed file")
    829 
    830         if "\n" in self.buffer:
    831             pos = self.buffer.find("\n") + 1
    832         else:
    833             buffers = [self.buffer]
    834             while True:
    835                 buf = self.fileobj.read(self.blocksize)
    836                 buffers.append(buf)
    837                 if not buf or "\n" in buf:
    838                     self.buffer = "".join(buffers)
    839                     pos = self.buffer.find("\n") + 1
    840                     if pos == 0:
    841                         # no newline found.

    842                         pos = len(self.buffer)
    843                     break
    844 
    845         if size != -1:
    846             pos = min(size, pos)
    847 
    848         buf = self.buffer[:pos]
    849         self.buffer = self.buffer[pos:]
    850         self.position += len(buf)
    851         return buf
    852 
    853     def readlines(self):
    854         """Return a list with all remaining lines.
    855         """
    856         result = []
    857         while True:
    858             line = self.readline()
    859             if not line: break
    860             result.append(line)
    861         return result
    862 
    863     def tell(self):
    864         """Return the current file position.
    865         """
    866         if self.closed:
    867             raise ValueError("I/O operation on closed file")
    868 
    869         return self.position
    870 
    871     def seek(self, pos, whence=os.SEEK_SET):
    872         """Seek to a position in the file.
    873         """
    874         if self.closed:
    875             raise ValueError("I/O operation on closed file")
    876 
    877         if whence == os.SEEK_SET:
    878             self.position = min(max(pos, 0), self.size)
    879         elif whence == os.SEEK_CUR:
    880             if pos < 0:
    881                 self.position = max(self.position + pos, 0)
    882             else:
    883                 self.position = min(self.position + pos, self.size)
    884         elif whence == os.SEEK_END:
    885             self.position = max(min(self.size + pos, self.size), 0)
    886         else:
    887             raise ValueError("Invalid argument")
    888 
    889         self.buffer = ""
    890         self.fileobj.seek(self.position)
    891 
    892     def close(self):
    893         """Close the file object.
    894         """
    895         self.closed = True
    896 
    897     def __iter__(self):
    898         """Get an iterator over the file's lines.
    899         """
    900         while True:
    901             line = self.readline()
    902             if not line:
    903                 break
    904             yield line
    905 #class ExFileObject

    906 
    907 #------------------

    908 # Exported Classes

    909 #------------------

    910 class TarInfo(object):
    911     """Informational class which holds the details about an
    912        archive member given by a tar header block.
    913        TarInfo objects are returned by TarFile.getmember(),
    914        TarFile.getmembers() and TarFile.gettarinfo() and are
    915        usually created internally.
    916     """
    917 
    918     def __init__(self, name=""):
    919         """Construct a TarInfo object. name is the optional name
    920            of the member.
    921         """
    922         self.name = name        # member name

    923         self.mode = 0644        # file permissions

    924         self.uid = 0            # user id

    925         self.gid = 0            # group id

    926         self.size = 0           # file size

    927         self.mtime = 0          # modification time

    928         self.chksum = 0         # header checksum

    929         self.type = REGTYPE     # member type

    930         self.linkname = ""      # link name

    931         self.uname = ""         # user name

    932         self.gname = ""         # group name

    933         self.devmajor = 0       # device major number

    934         self.devminor = 0       # device minor number

    935 
    936         self.offset = 0         # the tar header starts here

    937         self.offset_data = 0    # the file's data starts here

    938 
    939         self.pax_headers = {}   # pax header information

    940 
    941     # In pax headers the "name" and "linkname" field are called

    942     # "path" and "linkpath".

    943     def _getpath(self):
    944         return self.name
    945     def _setpath(self, name):
    946         self.name = name
    947     path = property(_getpath, _setpath)
    948 
    949     def _getlinkpath(self):
    950         return self.linkname
    951     def _setlinkpath(self, linkname):
    952         self.linkname = linkname
    953     linkpath = property(_getlinkpath, _setlinkpath)
    954 
    955     def __repr__(self):
    956         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
    957 
    958     def get_info(self, encoding, errors):
    959         """Return the TarInfo's attributes as a dictionary.
    960         """
    961         info = {
    962             "name":     self.name,
    963             "mode":     self.mode & 07777,
    964             "uid":      self.uid,
    965             "gid":      self.gid,
    966             "size":     self.size,
    967             "mtime":    self.mtime,
    968             "chksum":   self.chksum,
    969             "type":     self.type,
    970             "linkname": self.linkname,
    971             "uname":    self.uname,
    972             "gname":    self.gname,
    973             "devmajor": self.devmajor,
    974             "devminor": self.devminor
    975         }
    976 
    977         if info["type"] == DIRTYPE and not info["name"].endswith("/"):
    978             info["name"] += "/"
    979 
    980         for key in ("name", "linkname", "uname", "gname"):
    981             if type(info[key]) is unicode:
    982                 info[key] = info[key].encode(encoding, errors)
    983 
    984         return info
    985 
    986     def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
    987         """Return a tar header as a string of 512 byte blocks.
    988         """
    989         info = self.get_info(encoding, errors)
    990 
    991         if format == USTAR_FORMAT:
    992             return self.create_ustar_header(info)
    993         elif format == GNU_FORMAT:
    994             return self.create_gnu_header(info)
    995         elif format == PAX_FORMAT:
    996             return self.create_pax_header(info, encoding, errors)
    997         else:
    998             raise ValueError("invalid format")
    999 
   1000     def create_ustar_header(self, info):
   1001         """Return the object as a ustar header block.
   1002         """
   1003         info["magic"] = POSIX_MAGIC
   1004 
   1005         if len(info["linkname"]) > LENGTH_LINK:
   1006             raise ValueError("linkname is too long")
   1007 
   1008         if len(info["name"]) > LENGTH_NAME:
   1009             info["prefix"], info["name"] = self._posix_split_name(info["name"])
   1010 
   1011         return self._create_header(info, USTAR_FORMAT)
   1012 
   1013     def create_gnu_header(self, info):
   1014         """Return the object as a GNU header block sequence.
   1015         """
   1016         info["magic"] = GNU_MAGIC
   1017 
   1018         buf = ""
   1019         if len(info["linkname"]) > LENGTH_LINK:
   1020             buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
   1021 
   1022         if len(info["name"]) > LENGTH_NAME:
   1023             buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
   1024 
   1025         return buf + self._create_header(info, GNU_FORMAT)
   1026 
   1027     def create_pax_header(self, info, encoding, errors):
   1028         """Return the object as a ustar header block. If it cannot be
   1029            represented this way, prepend a pax extended header sequence
   1030            with supplement information.
   1031         """
   1032         info["magic"] = POSIX_MAGIC
   1033         pax_headers = self.pax_headers.copy()
   1034 
   1035         # Test string fields for values that exceed the field length or cannot

   1036         # be represented in ASCII encoding.

   1037         for name, hname, length in (
   1038                 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
   1039                 ("uname", "uname", 32), ("gname", "gname", 32)):
   1040 
   1041             if hname in pax_headers:
   1042                 # The pax header has priority.

   1043                 continue
   1044 
   1045             val = info[name].decode(encoding, errors)
   1046 
   1047             # Try to encode the string as ASCII.

   1048             try:
   1049                 val.encode("ascii")
   1050             except UnicodeEncodeError:
   1051                 pax_headers[hname] = val
   1052                 continue
   1053 
   1054             if len(info[name]) > length:
   1055                 pax_headers[hname] = val
   1056 
   1057         # Test number fields for values that exceed the field limit or values

   1058         # that like to be stored as float.

   1059         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
   1060             if name in pax_headers:
   1061                 # The pax header has priority. Avoid overflow.

   1062                 info[name] = 0
   1063                 continue
   1064 
   1065             val = info[name]
   1066             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
   1067                 pax_headers[name] = unicode(val)
   1068                 info[name] = 0
   1069 
   1070         # Create a pax extended header if necessary.

   1071         if pax_headers:
   1072             buf = self._create_pax_generic_header(pax_headers)
   1073         else:
   1074             buf = ""
   1075 
   1076         return buf + self._create_header(info, USTAR_FORMAT)
   1077 
   1078     @classmethod
   1079     def create_pax_global_header(cls, pax_headers):
   1080         """Return the object as a pax global header block sequence.
   1081         """
   1082         return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
   1083 
   1084     def _posix_split_name(self, name):
   1085         """Split a name longer than 100 chars into a prefix
   1086            and a name part.
   1087         """
   1088         prefix = name[:LENGTH_PREFIX + 1]
   1089         while prefix and prefix[-1] != "/":
   1090             prefix = prefix[:-1]
   1091 
   1092         name = name[len(prefix):]
   1093         prefix = prefix[:-1]
   1094 
   1095         if not prefix or len(name) > LENGTH_NAME:
   1096             raise ValueError("name is too long")
   1097         return prefix, name
   1098 
   1099     @staticmethod
   1100     def _create_header(info, format):
   1101         """Return a header block. info is a dictionary with file
   1102            information, format must be one of the *_FORMAT constants.
   1103         """
   1104         parts = [
   1105             stn(info.get("name", ""), 100),
   1106             itn(info.get("mode", 0) & 07777, 8, format),
   1107             itn(info.get("uid", 0), 8, format),
   1108             itn(info.get("gid", 0), 8, format),
   1109             itn(info.get("size", 0), 12, format),
   1110             itn(info.get("mtime", 0), 12, format),
   1111             "        ", # checksum field

   1112             info.get("type", REGTYPE),
   1113             stn(info.get("linkname", ""), 100),
   1114             stn(info.get("magic", POSIX_MAGIC), 8),
   1115             stn(info.get("uname", ""), 32),
   1116             stn(info.get("gname", ""), 32),
   1117             itn(info.get("devmajor", 0), 8, format),
   1118             itn(info.get("devminor", 0), 8, format),
   1119             stn(info.get("prefix", ""), 155)
   1120         ]
   1121 
   1122         buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
   1123         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
   1124         buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
   1125         return buf
   1126 
   1127     @staticmethod
   1128     def _create_payload(payload):
   1129         """Return the string payload filled with zero bytes
   1130            up to the next 512 byte border.
   1131         """
   1132         blocks, remainder = divmod(len(payload), BLOCKSIZE)
   1133         if remainder > 0:
   1134             payload += (BLOCKSIZE - remainder) * NUL
   1135         return payload
   1136 
   1137     @classmethod
   1138     def _create_gnu_long_header(cls, name, type):
   1139         """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
   1140            for name.
   1141         """
   1142         name += NUL
   1143 
   1144         info = {}
   1145         info["name"] = "././@LongLink"
   1146         info["type"] = type
   1147         info["size"] = len(name)
   1148         info["magic"] = GNU_MAGIC
   1149 
   1150         # create extended header + name blocks.

   1151         return cls._create_header(info, USTAR_FORMAT) + \
   1152                 cls._create_payload(name)
   1153 
   1154     @classmethod
   1155     def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
   1156         """Return a POSIX.1-2001 extended or global header sequence
   1157            that contains a list of keyword, value pairs. The values
   1158            must be unicode objects.
   1159         """
   1160         records = []
   1161         for keyword, value in pax_headers.iteritems():
   1162             keyword = keyword.encode("utf8")
   1163             value = value.encode("utf8")
   1164             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'

   1165             n = p = 0
   1166             while True:
   1167                 n = l + len(str(p))
   1168                 if n == p:
   1169                     break
   1170                 p = n
   1171             records.append("%d %s=%s\n" % (p, keyword, value))
   1172         records = "".join(records)
   1173 
   1174         # We use a hardcoded "././@PaxHeader" name like star does

   1175         # instead of the one that POSIX recommends.

   1176         info = {}
   1177         info["name"] = "././@PaxHeader"
   1178         info["type"] = type
   1179         info["size"] = len(records)
   1180         info["magic"] = POSIX_MAGIC
   1181 
   1182         # Create pax header + record blocks.

   1183         return cls._create_header(info, USTAR_FORMAT) + \
   1184                 cls._create_payload(records)
   1185 
   1186     @classmethod
   1187     def frombuf(cls, buf):
   1188         """Construct a TarInfo object from a 512 byte string buffer.
   1189         """
   1190         if len(buf) == 0:
   1191             raise EmptyHeaderError("empty header")
   1192         if len(buf) != BLOCKSIZE:
   1193             raise TruncatedHeaderError("truncated header")
   1194         if buf.count(NUL) == BLOCKSIZE:
   1195             raise EOFHeaderError("end of file header")
   1196 
   1197         chksum = nti(buf[148:156])
   1198         if chksum not in calc_chksums(buf):
   1199             raise InvalidHeaderError("bad checksum")
   1200 
   1201         obj = cls()
   1202         obj.buf = buf
   1203         obj.name = nts(buf[0:100])
   1204         obj.mode = nti(buf[100:108])
   1205         obj.uid = nti(buf[108:116])
   1206         obj.gid = nti(buf[116:124])
   1207         obj.size = nti(buf[124:136])
   1208         obj.mtime = nti(buf[136:148])
   1209         obj.chksum = chksum
   1210         obj.type = buf[156:157]
   1211         obj.linkname = nts(buf[157:257])
   1212         obj.uname = nts(buf[265:297])
   1213         obj.gname = nts(buf[297:329])
   1214         obj.devmajor = nti(buf[329:337])
   1215         obj.devminor = nti(buf[337:345])
   1216         prefix = nts(buf[345:500])
   1217 
   1218         # Old V7 tar format represents a directory as a regular

   1219         # file with a trailing slash.

   1220         if obj.type == AREGTYPE and obj.name.endswith("/"):
   1221             obj.type = DIRTYPE
   1222 
   1223         # Remove redundant slashes from directories.

   1224         if obj.isdir():
   1225             obj.name = obj.name.rstrip("/")
   1226 
   1227         # Reconstruct a ustar longname.

   1228         if prefix and obj.type not in GNU_TYPES:
   1229             obj.name = prefix + "/" + obj.name
   1230         return obj
   1231 
   1232     @classmethod
   1233     def fromtarfile(cls, tarfile):
   1234         """Return the next TarInfo object from TarFile object
   1235            tarfile.
   1236         """
   1237         buf = tarfile.fileobj.read(BLOCKSIZE)
   1238         obj = cls.frombuf(buf)
   1239         obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
   1240         return obj._proc_member(tarfile)
   1241 
   1242     #--------------------------------------------------------------------------

   1243     # The following are methods that are called depending on the type of a

   1244     # member. The entry point is _proc_member() which can be overridden in a

   1245     # subclass to add custom _proc_*() methods. A _proc_*() method MUST

   1246     # implement the following

   1247     # operations:

   1248     # 1. Set self.offset_data to the position where the data blocks begin,

   1249     #    if there is data that follows.

   1250     # 2. Set tarfile.offset to the position where the next member's header will

   1251     #    begin.

   1252     # 3. Return self or another valid TarInfo object.

   1253     def _proc_member(self, tarfile):
   1254         """Choose the right processing method depending on
   1255            the type and call it.
   1256         """
   1257         if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
   1258             return self._proc_gnulong(tarfile)
   1259         elif self.type == GNUTYPE_SPARSE:
   1260             return self._proc_sparse(tarfile)
   1261         elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
   1262             return self._proc_pax(tarfile)
   1263         else:
   1264             return self._proc_builtin(tarfile)
   1265 
   1266     def _proc_builtin(self, tarfile):
   1267         """Process a builtin type or an unknown type which
   1268            will be treated as a regular file.
   1269         """
   1270         self.offset_data = tarfile.fileobj.tell()
   1271         offset = self.offset_data
   1272         if self.isreg() or self.type not in SUPPORTED_TYPES:
   1273             # Skip the following data blocks.

   1274             offset += self._block(self.size)
   1275         tarfile.offset = offset
   1276 
   1277         # Patch the TarInfo object with saved global

   1278         # header information.

   1279         self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
   1280 
   1281         return self
   1282 
   1283     def _proc_gnulong(self, tarfile):
   1284         """Process the blocks that hold a GNU longname
   1285            or longlink member.
   1286         """
   1287         buf = tarfile.fileobj.read(self._block(self.size))
   1288 
   1289         # Fetch the next header and process it.

   1290         try:
   1291             next = self.fromtarfile(tarfile)
   1292         except HeaderError:
   1293             raise SubsequentHeaderError("missing or bad subsequent header")
   1294 
   1295         # Patch the TarInfo object from the next header with

   1296         # the longname information.

   1297         next.offset = self.offset
   1298         if self.type == GNUTYPE_LONGNAME:
   1299             next.name = nts(buf)
   1300         elif self.type == GNUTYPE_LONGLINK:
   1301             next.linkname = nts(buf)
   1302 
   1303         return next
   1304 
   1305     def _proc_sparse(self, tarfile):
   1306         """Process a GNU sparse header plus extra headers.
   1307         """
   1308         buf = self.buf
   1309         sp = _ringbuffer()
   1310         pos = 386
   1311         lastpos = 0L
   1312         realpos = 0L
   1313         # There are 4 possible sparse structs in the

   1314         # first header.

   1315         for i in xrange(4):
   1316             try:
   1317                 offset = nti(buf[pos:pos + 12])
   1318                 numbytes = nti(buf[pos + 12:pos + 24])
   1319             except ValueError:
   1320                 break
   1321             if offset > lastpos:
   1322                 sp.append(_hole(lastpos, offset - lastpos))
   1323             sp.append(_data(offset, numbytes, realpos))
   1324             realpos += numbytes
   1325             lastpos = offset + numbytes
   1326             pos += 24
   1327 
   1328         isextended = ord(buf[482])
   1329         origsize = nti(buf[483:495])
   1330 
   1331         # If the isextended flag is given,

   1332         # there are extra headers to process.

   1333         while isextended == 1:
   1334             buf = tarfile.fileobj.read(BLOCKSIZE)
   1335             pos = 0
   1336             for i in xrange(21):
   1337                 try:
   1338                     offset = nti(buf[pos:pos + 12])
   1339                     numbytes = nti(buf[pos + 12:pos + 24])
   1340                 except ValueError:
   1341                     break
   1342                 if offset > lastpos:
   1343                     sp.append(_hole(lastpos, offset - lastpos))
   1344                 sp.append(_data(offset, numbytes, realpos))
   1345                 realpos += numbytes
   1346                 lastpos = offset + numbytes
   1347                 pos += 24
   1348             isextended = ord(buf[504])
   1349 
   1350         if lastpos < origsize:
   1351             sp.append(_hole(lastpos, origsize - lastpos))
   1352 
   1353         self.sparse = sp
   1354 
   1355         self.offset_data = tarfile.fileobj.tell()
   1356         tarfile.offset = self.offset_data + self._block(self.size)
   1357         self.size = origsize
   1358 
   1359         return self
   1360 
   1361     def _proc_pax(self, tarfile):
   1362         """Process an extended or global header as described in
   1363            POSIX.1-2001.
   1364         """
   1365         # Read the header information.

   1366         buf = tarfile.fileobj.read(self._block(self.size))
   1367 
   1368         # A pax header stores supplemental information for either

   1369         # the following file (extended) or all following files

   1370         # (global).

   1371         if self.type == XGLTYPE:
   1372             pax_headers = tarfile.pax_headers
   1373         else:
   1374             pax_headers = tarfile.pax_headers.copy()
   1375 
   1376         # Parse pax header information. A record looks like that:

   1377         # "%d %s=%s\n" % (length, keyword, value). length is the size

   1378         # of the complete record including the length field itself and

   1379         # the newline. keyword and value are both UTF-8 encoded strings.

   1380         regex = re.compile(r"(\d+) ([^=]+)=", re.U)
   1381         pos = 0
   1382         while True:
   1383             match = regex.match(buf, pos)
   1384             if not match:
   1385                 break
   1386 
   1387             length, keyword = match.groups()
   1388             length = int(length)
   1389             value = buf[match.end(2) + 1:match.start(1) + length - 1]
   1390 
   1391             keyword = keyword.decode("utf8")
   1392             value = value.decode("utf8")
   1393 
   1394             pax_headers[keyword] = value
   1395             pos += length
   1396 
   1397         # Fetch the next header.

   1398         try:
   1399             next = self.fromtarfile(tarfile)
   1400         except HeaderError:
   1401             raise SubsequentHeaderError("missing or bad subsequent header")
   1402 
   1403         if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
   1404             # Patch the TarInfo object with the extended header info.

   1405             next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
   1406             next.offset = self.offset
   1407 
   1408             if "size" in pax_headers:
   1409                 # If the extended header replaces the size field,

   1410                 # we need to recalculate the offset where the next

   1411                 # header starts.

   1412                 offset = next.offset_data
   1413                 if next.isreg() or next.type not in SUPPORTED_TYPES:
   1414                     offset += next._block(next.size)
   1415                 tarfile.offset = offset
   1416 
   1417         return next
   1418 
   1419     def _apply_pax_info(self, pax_headers, encoding, errors):
   1420         """Replace fields with supplemental information from a previous
   1421            pax extended or global header.
   1422         """
   1423         for keyword, value in pax_headers.iteritems():
   1424             if keyword not in PAX_FIELDS:
   1425                 continue
   1426 
   1427             if keyword == "path":
   1428                 value = value.rstrip("/")
   1429 
   1430             if keyword in PAX_NUMBER_FIELDS:
   1431                 try:
   1432                     value = PAX_NUMBER_FIELDS[keyword](value)
   1433                 except ValueError:
   1434                     value = 0
   1435             else:
   1436                 value = uts(value, encoding, errors)
   1437 
   1438             setattr(self, keyword, value)
   1439 
   1440         self.pax_headers = pax_headers.copy()
   1441 
   1442     def _block(self, count):
   1443         """Round up a byte count by BLOCKSIZE and return it,
   1444            e.g. _block(834) => 1024.
   1445         """
   1446         blocks, remainder = divmod(count, BLOCKSIZE)
   1447         if remainder:
   1448             blocks += 1
   1449         return blocks * BLOCKSIZE
   1450 
   1451     def isreg(self):
   1452         return self.type in REGULAR_TYPES
   1453     def isfile(self):
   1454         return self.isreg()
   1455     def isdir(self):
   1456         return self.type == DIRTYPE
   1457     def issym(self):
   1458         return self.type == SYMTYPE
   1459     def islnk(self):
   1460         return self.type == LNKTYPE
   1461     def ischr(self):
   1462         return self.type == CHRTYPE
   1463     def isblk(self):
   1464         return self.type == BLKTYPE
   1465     def isfifo(self):
   1466         return self.type == FIFOTYPE
   1467     def issparse(self):
   1468         return self.type == GNUTYPE_SPARSE
   1469     def isdev(self):
   1470         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
   1471 # class TarInfo

   1472 
   1473 class TarFile(object):
   1474     """The TarFile Class provides an interface to tar archives.
   1475     """
   1476 
   1477     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)

   1478 
   1479     dereference = False         # If true, add content of linked file to the

   1480                                 # tar file, else the link.

   1481 
   1482     ignore_zeros = False        # If true, skips empty or invalid blocks and

   1483                                 # continues processing.

   1484 
   1485     errorlevel = 1              # If 0, fatal errors only appear in debug

   1486                                 # messages (if debug >= 0). If > 0, errors

   1487                                 # are passed to the caller as exceptions.

   1488 
   1489     format = DEFAULT_FORMAT     # The format to use when creating an archive.

   1490 
   1491     encoding = ENCODING         # Encoding for 8-bit character strings.

   1492 
   1493     errors = None               # Error handler for unicode conversion.

   1494 
   1495     tarinfo = TarInfo           # The default TarInfo class to use.

   1496 
   1497     fileobject = ExFileObject   # The default ExFileObject class to use.

   1498 
   1499     def __init__(self, name=None, mode="r", fileobj=None, format=None,
   1500             tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
   1501             errors=None, pax_headers=None, debug=None, errorlevel=None):
   1502         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
   1503            read from an existing archive, 'a' to append data to an existing
   1504            file or 'w' to create a new file overwriting an existing one. `mode'
   1505            defaults to 'r'.
   1506            If `fileobj' is given, it is used for reading or writing data. If it
   1507            can be determined, `mode' is overridden by `fileobj's mode.
   1508            `fileobj' is not closed, when TarFile is closed.
   1509         """
   1510         if len(mode) > 1 or mode not in "raw":
   1511             raise ValueError("mode must be 'r', 'a' or 'w'")
   1512         self.mode = mode
   1513         self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
   1514 
   1515         if not fileobj:
   1516             if self.mode == "a" and not os.path.exists(name):
   1517                 # Create nonexistent files in append mode.
   1518                 self.mode = "w"
   1519                 self._mode = "wb"
   1520             fileobj = bltn_open(name, self._mode)
   1521             self._extfileobj = False
   1522         else:
   1523             if name is None and hasattr(fileobj, "name"):
   1524                 name = fileobj.name
   1525             if hasattr(fileobj, "mode"):
   1526                 self._mode = fileobj.mode
   1527             self._extfileobj = True
   1528         self.name = os.path.abspath(name) if name else None
   1529         self.fileobj = fileobj
   1530 
   1531         # Init attributes.
   1532         if format is not None:
   1533             self.format = format
   1534         if tarinfo is not None:
   1535             self.tarinfo = tarinfo
   1536         if dereference is not None:
   1537             self.dereference = dereference
   1538         if ignore_zeros is not None:
   1539             self.ignore_zeros = ignore_zeros
   1540         if encoding is not None:
   1541             self.encoding = encoding
   1542 
   1543         if errors is not None:
   1544             self.errors = errors
   1545         elif mode == "r":
   1546             self.errors = "utf-8"
   1547         else:
   1548             self.errors = "strict"
   1549 
   1550         if pax_headers is not None and self.format == PAX_FORMAT:
   1551             self.pax_headers = pax_headers
   1552         else:
   1553             self.pax_headers = {}
   1554 
   1555         if debug is not None:
   1556             self.debug = debug
   1557         if errorlevel is not None:
   1558             self.errorlevel = errorlevel
   1559 
   1560         # Init datastructures.
   1561         self.closed = False
   1562         self.members = []       # list of members as TarInfo objects
   1563         self._loaded = False    # flag if all members have been read
   1564         self.offset = self.fileobj.tell()
   1565                                 # current position in the archive file
   1566         self.inodes = {}        # dictionary caching the inodes of
   1567                                 # archive members already added
   1568 
   1569         try:
   1570             if self.mode == "r":
   1571                 self.firstmember = None
   1572                 self.firstmember = self.next()
   1573 
   1574             if self.mode == "a":
   1575                 # Move to the end of the archive,
   1576                 # before the first empty block.
   1577                 while True:
   1578                     self.fileobj.seek(self.offset)
   1579                     try:
   1580                         tarinfo = self.tarinfo.fromtarfile(self)
   1581                         self.members.append(tarinfo)
   1582                     except EOFHeaderError:
   1583                         self.fileobj.seek(self.offset)
   1584                         break
   1585                     except HeaderError, e:
   1586                         raise ReadError(str(e))
   1587 
   1588             if self.mode in "aw":
   1589                 self._loaded = True
   1590 
   1591                 if self.pax_headers:
   1592                     buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
   1593                     self.fileobj.write(buf)
   1594                     self.offset += len(buf)
   1595         except:
   1596             if not self._extfileobj:
   1597                 self.fileobj.close()
   1598             self.closed = True
   1599             raise
   1600 
   1601     def _getposix(self):
   1602         return self.format == USTAR_FORMAT
   1603     def _setposix(self, value):
   1604         import warnings
   1605         warnings.warn("use the format attribute instead", DeprecationWarning,
   1606                       2)
   1607         if value:
   1608             self.format = USTAR_FORMAT
   1609         else:
   1610             self.format = GNU_FORMAT
   1611     posix = property(_getposix, _setposix)
   1612 
   1613     #--------------------------------------------------------------------------
   1614     # Below are the classmethods which act as alternate constructors to the
   1615     # TarFile class. The open() method is the only one that is needed for
   1616     # public use; it is the "super"-constructor and is able to select an
   1617     # adequate "sub"-constructor for a particular compression using the mapping
   1618     # from OPEN_METH.
   1619     #
   1620     # This concept allows one to subclass TarFile without losing the comfort of
   1621     # the super-constructor. A sub-constructor is registered and made available
   1622     # by adding it to the mapping in OPEN_METH.
   1623 
   1624     @classmethod
   1625     def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
   1626         """Open a tar archive for reading, writing or appending. Return
   1627            an appropriate TarFile class.
   1628 
   1629            mode:
   1630            'r' or 'r:*' open for reading with transparent compression
   1631            'r:'         open for reading exclusively uncompressed
   1632            'r:gz'       open for reading with gzip compression
   1633            'r:bz2'      open for reading with bzip2 compression
   1634            'a' or 'a:'  open for appending, creating the file if necessary
   1635            'w' or 'w:'  open for writing without compression
   1636            'w:gz'       open for writing with gzip compression
   1637            'w:bz2'      open for writing with bzip2 compression
   1638 
   1639            'r|*'        open a stream of tar blocks with transparent compression
   1640            'r|'         open an uncompressed stream of tar blocks for reading
   1641            'r|gz'       open a gzip compressed stream of tar blocks
   1642            'r|bz2'      open a bzip2 compressed stream of tar blocks
   1643            'w|'         open an uncompressed stream for writing
   1644            'w|gz'       open a gzip compressed stream for writing
   1645            'w|bz2'      open a bzip2 compressed stream for writing
   1646         """
   1647 
   1648         if not name and not fileobj:
   1649             raise ValueError("nothing to open")
   1650 
   1651         if mode in ("r", "r:*"):
   1652             # Find out which *open() is appropriate for opening the file.
   1653             for comptype in cls.OPEN_METH:
   1654                 func = getattr(cls, cls.OPEN_METH[comptype])
   1655                 if fileobj is not None:
   1656                     saved_pos = fileobj.tell()
   1657                 try:
   1658                     return func(name, "r", fileobj, **kwargs)
   1659                 except (ReadError, CompressionError), e:
   1660                     if fileobj is not None:
   1661                         fileobj.seek(saved_pos)
   1662                     continue
   1663             raise ReadError("file could not be opened successfully")
   1664 
   1665         elif ":" in mode:
   1666             filemode, comptype = mode.split(":", 1)
   1667             filemode = filemode or "r"
   1668             comptype = comptype or "tar"
   1669 
   1670             # Select the *open() function according to
   1671             # given compression.
   1672             if comptype in cls.OPEN_METH:
   1673                 func = getattr(cls, cls.OPEN_METH[comptype])
   1674             else:
   1675                 raise CompressionError("unknown compression type %r" % comptype)
   1676             return func(name, filemode, fileobj, **kwargs)
   1677 
   1678         elif "|" in mode:
   1679             filemode, comptype = mode.split("|", 1)
   1680             filemode = filemode or "r"
   1681             comptype = comptype or "tar"
   1682 
   1683             if filemode not in "rw":
   1684                 raise ValueError("mode must be 'r' or 'w'")
   1685 
   1686             t = cls(name, filemode,
   1687                     _Stream(name, filemode, comptype, fileobj, bufsize),
   1688                     **kwargs)
   1689             t._extfileobj = False
   1690             return t
   1691 
   1692         elif mode in "aw":
   1693             return cls.taropen(name, mode, fileobj, **kwargs)
   1694 
   1695         raise ValueError("undiscernible mode")
   1696 
   1697     @classmethod
   1698     def taropen(cls, name, mode="r", fileobj=None, **kwargs):
   1699         """Open uncompressed tar archive name for reading or writing.
   1700         """
   1701         if len(mode) > 1 or mode not in "raw":
   1702             raise ValueError("mode must be 'r', 'a' or 'w'")
   1703         return cls(name, mode, fileobj, **kwargs)
   1704 
   1705     @classmethod
   1706     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
   1707         """Open gzip compressed tar archive name for reading or writing.
   1708            Appending is not allowed.
   1709         """
   1710         if len(mode) > 1 or mode not in "rw":
   1711             raise ValueError("mode must be 'r' or 'w'")
   1712 
   1713         try:
   1714             import gzip
   1715             gzip.GzipFile
   1716         except (ImportError, AttributeError):
   1717             raise CompressionError("gzip module is not available")
   1718 
   1719         if fileobj is None:
   1720             fileobj = bltn_open(name, mode + "b")
   1721 
   1722         try:
   1723             t = cls.taropen(name, mode,
   1724                 gzip.GzipFile(name, mode, compresslevel, fileobj),
   1725                 **kwargs)
   1726         except IOError:
   1727             raise ReadError("not a gzip file")
   1728         t._extfileobj = False
   1729         return t
   1730 
   1731     @classmethod
   1732     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
   1733         """Open bzip2 compressed tar archive name for reading or writing.
   1734            Appending is not allowed.
   1735         """
   1736         if len(mode) > 1 or mode not in "rw":
   1737             raise ValueError("mode must be 'r' or 'w'.")
   1738 
   1739         try:
   1740             import bz2
   1741         except ImportError:
   1742             raise CompressionError("bz2 module is not available")
   1743 
   1744         if fileobj is not None:
   1745             fileobj = _BZ2Proxy(fileobj, mode)
   1746         else:
   1747             fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
   1748 
   1749         try:
   1750             t = cls.taropen(name, mode, fileobj, **kwargs)
   1751         except (IOError, EOFError):
   1752             raise ReadError("not a bzip2 file")
   1753         t._extfileobj = False
   1754         return t
   1755 
   1756     # All *open() methods are registered here.
   1757     OPEN_METH = {
   1758         "tar": "taropen",   # uncompressed tar
   1759         "gz":  "gzopen",    # gzip compressed tar
   1760         "bz2": "bz2open"    # bzip2 compressed tar
   1761     }
   1762 
   1763     #--------------------------------------------------------------------------
   1764     # The public methods which TarFile provides:
   1765 
   1766     def close(self):
   1767         """Close the TarFile. In write-mode, two finishing zero blocks are
   1768            appended to the archive.
   1769         """
   1770         if self.closed:
   1771             return
   1772 
   1773         if self.mode in "aw":
   1774             self.fileobj.write(NUL * (BLOCKSIZE * 2))
   1775             self.offset += (BLOCKSIZE * 2)
   1776             # fill up the end with zero-blocks
   1777             # (like option -b20 for tar does)
   1778             blocks, remainder = divmod(self.offset, RECORDSIZE)
   1779             if remainder > 0:
   1780                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
   1781 
   1782         if not self._extfileobj:
   1783             self.fileobj.close()
   1784         self.closed = True
   1785 
   1786     def getmember(self, name):
   1787         """Return a TarInfo object for member `name'. If `name' can not be
   1788            found in the archive, KeyError is raised. If a member occurs more
   1789            than once in the archive, its last occurrence is assumed to be the
   1790            most up-to-date version.
   1791         """
   1792         tarinfo = self._getmember(name)
   1793         if tarinfo is None:
   1794             raise KeyError("filename %r not found" % name)
   1795         return tarinfo
   1796 
   1797     def getmembers(self):
   1798         """Return the members of the archive as a list of TarInfo objects. The
   1799            list has the same order as the members in the archive.
   1800         """
   1801         self._check()
   1802         if not self._loaded:    # if we want to obtain a list of

   1803             self._load()        # all members, we first have to

   1804                                 # scan the whole archive.

   1805         return self.members
   1806 
   1807     def getnames(self):
   1808         """Return the members of the archive as a list of their names. It has
   1809            the same order as the list returned by getmembers().
   1810         """
   1811         return [tarinfo.name for tarinfo in self.getmembers()]
   1812 
   1813     def gettarinfo(self, name=None, arcname=None, fileobj=None):
   1814         """Create a TarInfo object for either the file `name' or the file
   1815            object `fileobj' (using os.fstat on its file descriptor). You can
   1816            modify some of the TarInfo's attributes before you add it using
   1817            addfile(). If given, `arcname' specifies an alternative name for the
   1818            file in the archive.
   1819         """
   1820         self._check("aw")
   1821 
   1822         # When fileobj is given, replace name by
   1823         # fileobj's real name.
   1824         if fileobj is not None:
   1825             name = fileobj.name
   1826 
   1827         # Building the name of the member in the archive.
   1828         # Backward slashes are converted to forward slashes,
   1829         # Absolute paths are turned to relative paths.
   1830         if arcname is None:
   1831             arcname = name
   1832         drv, arcname = os.path.splitdrive(arcname)
   1833         arcname = arcname.replace(os.sep, "/")
   1834         arcname = arcname.lstrip("/")
   1835 
   1836         # Now, fill the TarInfo object with
   1837         # information specific for the file.
   1838         tarinfo = self.tarinfo()
   1839         tarinfo.tarfile = self
   1840 
   1841         # Use os.stat or os.lstat, depending on platform
   1842         # and if symlinks shall be resolved.
   1843         if fileobj is None:
   1844             if hasattr(os, "lstat") and not self.dereference:
   1845                 statres = os.lstat(name)
   1846             else:
   1847                 statres = os.stat(name)
   1848         else:
   1849             statres = os.fstat(fileobj.fileno())
   1850         linkname = ""
   1851 
   1852         stmd = statres.st_mode
   1853         if stat.S_ISREG(stmd):
   1854             inode = (statres.st_ino, statres.st_dev)
   1855             if not self.dereference and statres.st_nlink > 1 and \
   1856                     inode in self.inodes and arcname != self.inodes[inode]:
   1857                 # Is it a hardlink to an already
   1858                 # archived file?
   1859                 type = LNKTYPE
   1860                 linkname = self.inodes[inode]
   1861             else:
   1862                 # The inode is added only if its valid.
   1863                 # For win32 it is always 0.
   1864                 type = REGTYPE
   1865                 if inode[0]:
   1866                     self.inodes[inode] = arcname
   1867         elif stat.S_ISDIR(stmd):
   1868             type = DIRTYPE
   1869         elif stat.S_ISFIFO(stmd):
   1870             type = FIFOTYPE
   1871         elif stat.S_ISLNK(stmd):
   1872             type = SYMTYPE
   1873             linkname = os.readlink(name)
   1874         elif stat.S_ISCHR(stmd):
   1875             type = CHRTYPE
   1876         elif stat.S_ISBLK(stmd):
   1877             type = BLKTYPE
   1878         else:
   1879             return None
   1880 
   1881         # Fill the TarInfo object with all
   1882         # information we can get.
   1883         tarinfo.name = arcname
   1884         tarinfo.mode = stmd
   1885         tarinfo.uid = statres.st_uid
   1886         tarinfo.gid = statres.st_gid
   1887         if type == REGTYPE:
   1888             tarinfo.size = statres.st_size
   1889         else:
   1890             tarinfo.size = 0L
   1891         tarinfo.mtime = statres.st_mtime
   1892         tarinfo.type = type
   1893         tarinfo.linkname = linkname
   1894         if pwd:
   1895             try:
   1896                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
   1897             except KeyError:
   1898                 pass
   1899         if grp:
   1900             try:
   1901                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
   1902             except KeyError:
   1903                 pass
   1904 
   1905         if type in (CHRTYPE, BLKTYPE):
   1906             if hasattr(os, "major") and hasattr(os, "minor"):
   1907                 tarinfo.devmajor = os.major(statres.st_rdev)
   1908                 tarinfo.devminor = os.minor(statres.st_rdev)
   1909         return tarinfo
   1910 
   1911     def list(self, verbose=True):
   1912         """Print a table of contents to sys.stdout. If `verbose' is False, only
   1913            the names of the members are printed. If it is True, an `ls -l'-like
   1914            output is produced.
   1915         """
   1916         self._check()
   1917 
   1918         for tarinfo in self:
   1919             if verbose:
   1920                 print filemode(tarinfo.mode),
   1921                 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
   1922                                  tarinfo.gname or tarinfo.gid),
   1923                 if tarinfo.ischr() or tarinfo.isblk():
   1924                     print "%10s" % ("%d,%d" \
   1925                                     % (tarinfo.devmajor, tarinfo.devminor)),
   1926                 else:
   1927                     print "%10d" % tarinfo.size,
   1928                 print "%d-%02d-%02d %02d:%02d:%02d" \
   1929                       % time.localtime(tarinfo.mtime)[:6],
   1930 
   1931             print tarinfo.name + ("/" if tarinfo.isdir() else ""),
   1932 
   1933             if verbose:
   1934                 if tarinfo.issym():
   1935                     print "->", tarinfo.linkname,
   1936                 if tarinfo.islnk():
   1937                     print "link to", tarinfo.linkname,
   1938             print
   1939 
   1940     def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
   1941         """Add the file `name' to the archive. `name' may be any type of file
   1942            (directory, fifo, symbolic link, etc.). If given, `arcname'
   1943            specifies an alternative name for the file in the archive.
   1944            Directories are added recursively by default. This can be avoided by
   1945            setting `recursive' to False. `exclude' is a function that should
   1946            return True for each filename to be excluded. `filter' is a function
   1947            that expects a TarInfo object argument and returns the changed
   1948            TarInfo object, if it returns None the TarInfo object will be
   1949            excluded from the archive.
   1950         """
   1951         self._check("aw")
   1952 
   1953         if arcname is None:
   1954             arcname = name
   1955 
   1956         # Exclude pathnames.
   1957         if exclude is not None:
   1958             import warnings
   1959             warnings.warn("use the filter argument instead",
   1960                     DeprecationWarning, 2)
   1961             if exclude(name):
   1962                 self._dbg(2, "tarfile: Excluded %r" % name)
   1963                 return
   1964 
   1965         # Skip if somebody tries to archive the archive...
   1966         if self.name is not None and os.path.abspath(name) == self.name:
   1967             self._dbg(2, "tarfile: Skipped %r" % name)
   1968             return
   1969 
   1970         self._dbg(1, name)
   1971 
   1972         # Create a TarInfo object from the file.
   1973         tarinfo = self.gettarinfo(name, arcname)
   1974 
   1975         if tarinfo is None:
   1976             self._dbg(1, "tarfile: Unsupported type %r" % name)
   1977             return
   1978 
   1979         # Change or exclude the TarInfo object.
   1980         if filter is not None:
   1981             tarinfo = filter(tarinfo)
   1982             if tarinfo is None:
   1983                 self._dbg(2, "tarfile: Excluded %r" % name)
   1984                 return
   1985 
   1986         # Append the tar header and data to the archive.
   1987         if tarinfo.isreg():
   1988             f = bltn_open(name, "rb")
   1989             self.addfile(tarinfo, f)
   1990             f.close()
   1991 
   1992         elif tarinfo.isdir():
   1993             self.addfile(tarinfo)
   1994             if recursive:
   1995                 for f in os.listdir(name):
   1996                     self.add(os.path.join(name, f), os.path.join(arcname, f),
   1997                             recursive, exclude, filter)
   1998 
   1999         else:
   2000             self.addfile(tarinfo)
   2001 
   2002     def addfile(self, tarinfo, fileobj=None):
   2003         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
   2004            given, tarinfo.size bytes are read from it and added to the archive.
   2005            You can create TarInfo objects using gettarinfo().
   2006            On Windows platforms, `fileobj' should always be opened with mode
   2007            'rb' to avoid irritation about the file size.
   2008         """
   2009         self._check("aw")
   2010 
   2011         tarinfo = copy.copy(tarinfo)
   2012 
   2013         buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
   2014         self.fileobj.write(buf)
   2015         self.offset += len(buf)
   2016 
   2017         # If there's data to follow, append it.
   2018         if fileobj is not None:
   2019             copyfileobj(fileobj, self.fileobj, tarinfo.size)
   2020             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
   2021             if remainder > 0:
   2022                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
   2023                 blocks += 1
   2024             self.offset += blocks * BLOCKSIZE
   2025 
   2026         self.members.append(tarinfo)
   2027 
   2028     def extractall(self, path=".", members=None):
   2029         """Extract all members from the archive to the current working
   2030            directory and set owner, modification time and permissions on
   2031            directories afterwards. `path' specifies a different directory
   2032            to extract to. `members' is optional and must be a subset of the
   2033            list returned by getmembers().
   2034         """
   2035         directories = []
   2036 
   2037         if members is None:
   2038             members = self
   2039 
   2040         for tarinfo in members:
   2041             if tarinfo.isdir():
   2042                 # Extract directories with a safe mode.
   2043                 directories.append(tarinfo)
   2044                 tarinfo = copy.copy(tarinfo)
   2045                 tarinfo.mode = 0700
   2046             self.extract(tarinfo, path)
   2047 
   2048         # Reverse sort directories.
   2049         directories.sort(key=operator.attrgetter('name'))
   2050         directories.reverse()
   2051 
   2052         # Set correct owner, mtime and filemode on directories.
   2053         for tarinfo in directories:
   2054             dirpath = os.path.join(path, tarinfo.name)
   2055             try:
   2056                 self.chown(tarinfo, dirpath)
   2057                 self.utime(tarinfo, dirpath)
   2058                 self.chmod(tarinfo, dirpath)
   2059             except ExtractError, e:
   2060                 if self.errorlevel > 1:
   2061                     raise
   2062                 else:
   2063                     self._dbg(1, "tarfile: %s" % e)
   2064 
   2065     def extract(self, member, path=""):
   2066         """Extract a member from the archive to the current working directory,
   2067            using its full name. Its file information is extracted as accurately
   2068            as possible. `member' may be a filename or a TarInfo object. You can
   2069            specify a different directory using `path'.
   2070         """
   2071         self._check("r")
   2072 
   2073         if isinstance(member, basestring):
   2074             tarinfo = self.getmember(member)
   2075         else:
   2076             tarinfo = member
   2077 
   2078         # Prepare the link target for makelink().
   2079         if tarinfo.islnk():
   2080             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
   2081 
   2082         try:
   2083             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
   2084         except EnvironmentError, e:
   2085             if self.errorlevel > 0:
   2086                 raise
   2087             else:
   2088                 if e.filename is None:
   2089                     self._dbg(1, "tarfile: %s" % e.strerror)
   2090                 else:
   2091                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
   2092         except ExtractError, e:
   2093             if self.errorlevel > 1:
   2094                 raise
   2095             else:
   2096                 self._dbg(1, "tarfile: %s" % e)
   2097 
   2098     def extractfile(self, member):
   2099         """Extract a member from the archive as a file object. `member' may be
   2100            a filename or a TarInfo object. If `member' is a regular file, a
   2101            file-like object is returned. If `member' is a link, a file-like
   2102            object is constructed from the link's target. If `member' is none of
   2103            the above, None is returned.
   2104            The file-like object is read-only and provides the following
   2105            methods: read(), readline(), readlines(), seek() and tell()
   2106         """
   2107         self._check("r")
   2108 
   2109         if isinstance(member, basestring):
   2110             tarinfo = self.getmember(member)
   2111         else:
   2112             tarinfo = member
   2113 
   2114         if tarinfo.isreg():
   2115             return self.fileobject(self, tarinfo)
   2116 
   2117         elif tarinfo.type not in SUPPORTED_TYPES:
   2118             # If a member's type is unknown, it is treated as a

   2119             # regular file.

   2120             return self.fileobject(self, tarinfo)
   2121 
   2122         elif tarinfo.islnk() or tarinfo.issym():
   2123             if isinstance(self.fileobj, _Stream):
   2124                 # A small but ugly workaround for the case that someone tries

   2125                 # to extract a (sym)link as a file-object from a non-seekable

   2126                 # stream of tar blocks.

   2127                 raise StreamError("cannot extract (sym)link as file object")
   2128             else:
   2129                 # A (sym)link's file object is its target's file object.

   2130                 return self.extractfile(self._find_link_target(tarinfo))
   2131         else:
   2132             # If there's no data associated with the member (directory, chrdev,

   2133             # blkdev, etc.), return None instead of a file object.

   2134             return None
   2135 
   2136     def _extract_member(self, tarinfo, targetpath):
   2137         """Extract the TarInfo object tarinfo to a physical
   2138            file called targetpath.
   2139         """
   2140         # Fetch the TarInfo object for the given name

   2141         # and build the destination pathname, replacing

   2142         # forward slashes to platform specific separators.

   2143         targetpath = targetpath.rstrip("/")
   2144         targetpath = targetpath.replace("/", os.sep)
   2145 
   2146         # Create all upper directories.

   2147         upperdirs = os.path.dirname(targetpath)
   2148         if upperdirs and not os.path.exists(upperdirs):
   2149             # Create directories that are not part of the archive with

   2150             # default permissions.

   2151             os.makedirs(upperdirs)
   2152 
   2153         if tarinfo.islnk() or tarinfo.issym():
   2154             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
   2155         else:
   2156             self._dbg(1, tarinfo.name)
   2157 
   2158         if tarinfo.isreg():
   2159             self.makefile(tarinfo, targetpath)
   2160         elif tarinfo.isdir():
   2161             self.makedir(tarinfo, targetpath)
   2162         elif tarinfo.isfifo():
   2163             self.makefifo(tarinfo, targetpath)
   2164         elif tarinfo.ischr() or tarinfo.isblk():
   2165             self.makedev(tarinfo, targetpath)
   2166         elif tarinfo.islnk() or tarinfo.issym():
   2167             self.makelink(tarinfo, targetpath)
   2168         elif tarinfo.type not in SUPPORTED_TYPES:
   2169             self.makeunknown(tarinfo, targetpath)
   2170         else:
   2171             self.makefile(tarinfo, targetpath)
   2172 
   2173         self.chown(tarinfo, targetpath)
   2174         if not tarinfo.issym():
   2175             self.chmod(tarinfo, targetpath)
   2176             self.utime(tarinfo, targetpath)
   2177 
   2178     #--------------------------------------------------------------------------

   2179     # Below are the different file methods. They are called via

   2180     # _extract_member() when extract() is called. They can be replaced in a

   2181     # subclass to implement other functionality.

   2182 
   2183     def makedir(self, tarinfo, targetpath):
   2184         """Make a directory called targetpath.
   2185         """
   2186         try:
   2187             # Use a safe mode for the directory, the real mode is set

   2188             # later in _extract_member().

   2189             os.mkdir(targetpath, 0700)
   2190         except EnvironmentError, e:
   2191             if e.errno != errno.EEXIST:
   2192                 raise
   2193 
   2194     def makefile(self, tarinfo, targetpath):
   2195         """Make a file called targetpath.
   2196         """
   2197         source = self.extractfile(tarinfo)
   2198         target = bltn_open(targetpath, "wb")
   2199         copyfileobj(source, target)
   2200         source.close()
   2201         target.close()
   2202 
   2203     def makeunknown(self, tarinfo, targetpath):
   2204         """Make a file from a TarInfo object with an unknown type
   2205            at targetpath.
   2206         """
   2207         self.makefile(tarinfo, targetpath)
   2208         self._dbg(1, "tarfile: Unknown file type %r, " \
   2209                      "extracted as regular file." % tarinfo.type)
   2210 
   2211     def makefifo(self, tarinfo, targetpath):
   2212         """Make a fifo called targetpath.
   2213         """
   2214         if hasattr(os, "mkfifo"):
   2215             os.mkfifo(targetpath)
   2216         else:
   2217             raise ExtractError("fifo not supported by system")
   2218 
   2219     def makedev(self, tarinfo, targetpath):
   2220         """Make a character or block device called targetpath.
   2221         """
   2222         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
   2223             raise ExtractError("special devices not supported by system")
   2224 
   2225         mode = tarinfo.mode
   2226         if tarinfo.isblk():
   2227             mode |= stat.S_IFBLK
   2228         else:
   2229             mode |= stat.S_IFCHR
   2230 
   2231         os.mknod(targetpath, mode,
   2232                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
   2233 
   2234     def makelink(self, tarinfo, targetpath):
   2235         """Make a (symbolic) link called targetpath. If it cannot be created
   2236           (platform limitation), we try to make a copy of the referenced file
   2237           instead of a link.
   2238         """
   2239         if hasattr(os, "symlink") and hasattr(os, "link"):
   2240             # For systems that support symbolic and hard links.

   2241             if tarinfo.issym():
   2242                 if os.path.lexists(targetpath):
   2243                     os.unlink(targetpath)
   2244                 os.symlink(tarinfo.linkname, targetpath)
   2245             else:
   2246                 # See extract().

   2247                 if os.path.exists(tarinfo._link_target):
   2248                     if os.path.lexists(targetpath):
   2249                         os.unlink(targetpath)
   2250                     os.link(tarinfo._link_target, targetpath)
   2251                 else:
   2252                     self._extract_member(self._find_link_target(tarinfo), targetpath)
   2253         else:
   2254             try:
   2255                 self._extract_member(self._find_link_target(tarinfo), targetpath)
   2256             except KeyError:
   2257                 raise ExtractError("unable to resolve link inside archive")
   2258 
   2259     def chown(self, tarinfo, targetpath):
   2260         """Set owner of targetpath according to tarinfo.
   2261         """
   2262         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
   2263             # We have to be root to do so.

   2264             try:
   2265                 g = grp.getgrnam(tarinfo.gname)[2]
   2266             except KeyError:
   2267                 try:
   2268                     g = grp.getgrgid(tarinfo.gid)[2]
   2269                 except KeyError:
   2270                     g = os.getgid()
   2271             try:
   2272                 u = pwd.getpwnam(tarinfo.uname)[2]
   2273             except KeyError:
   2274                 try:
   2275                     u = pwd.getpwuid(tarinfo.uid)[2]
   2276                 except KeyError:
   2277                     u = os.getuid()
   2278             try:
   2279                 if tarinfo.issym() and hasattr(os, "lchown"):
   2280                     os.lchown(targetpath, u, g)
   2281                 else:
   2282                     if sys.platform != "os2emx":
   2283                         os.chown(targetpath, u, g)
   2284             except EnvironmentError, e:
   2285                 raise ExtractError("could not change owner")
   2286 
   2287     def chmod(self, tarinfo, targetpath):
   2288         """Set file permissions of targetpath according to tarinfo.
   2289         """
   2290         if hasattr(os, 'chmod'):
   2291             try:
   2292                 os.chmod(targetpath, tarinfo.mode)
   2293             except EnvironmentError, e:
   2294                 raise ExtractError("could not change mode")
   2295 
   2296     def utime(self, tarinfo, targetpath):
   2297         """Set modification time of targetpath according to tarinfo.
   2298         """
   2299         if not hasattr(os, 'utime'):
   2300             return
   2301         try:
   2302             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
   2303         except EnvironmentError, e:
   2304             raise ExtractError("could not change modification time")
   2305 
   2306     #--------------------------------------------------------------------------

   2307     def next(self):
   2308         """Return the next member of the archive as a TarInfo object, when
   2309            TarFile is opened for reading. Return None if there is no more
   2310            available.
   2311         """
   2312         self._check("ra")
   2313         if self.firstmember is not None:
   2314             m = self.firstmember
   2315             self.firstmember = None
   2316             return m
   2317 
   2318         # Read the next block.

   2319         self.fileobj.seek(self.offset)
   2320         tarinfo = None
   2321         while True:
   2322             try:
   2323                 tarinfo = self.tarinfo.fromtarfile(self)
   2324             except EOFHeaderError, e:
   2325                 if self.ignore_zeros:
   2326                     self._dbg(2, "0x%X: %s" % (self.offset, e))
   2327                     self.offset += BLOCKSIZE
   2328                     continue
   2329             except InvalidHeaderError, e:
   2330                 if self.ignore_zeros:
   2331                     self._dbg(2, "0x%X: %s" % (self.offset, e))
   2332                     self.offset += BLOCKSIZE
   2333                     continue
   2334                 elif self.offset == 0:
   2335                     raise ReadError(str(e))
   2336             except EmptyHeaderError:
   2337                 if self.offset == 0:
   2338                     raise ReadError("empty file")
   2339             except TruncatedHeaderError, e:
   2340                 if self.offset == 0:
   2341                     raise ReadError(str(e))
   2342             except SubsequentHeaderError, e:
   2343                 raise ReadError(str(e))
   2344             break
   2345 
   2346         if tarinfo is not None:
   2347             self.members.append(tarinfo)
   2348         else:
   2349             self._loaded = True
   2350 
   2351         return tarinfo
   2352 
   2353     #--------------------------------------------------------------------------

   2354     # Little helper methods:

   2355 
   2356     def _getmember(self, name, tarinfo=None, normalize=False):
   2357         """Find an archive member by name from bottom to top.
   2358            If tarinfo is given, it is used as the starting point.
   2359         """
   2360         # Ensure that all members have been loaded.

   2361         members = self.getmembers()
   2362 
   2363         # Limit the member search list up to tarinfo.

   2364         if tarinfo is not None:
   2365             members = members[:members.index(tarinfo)]
   2366 
   2367         if normalize:
   2368             name = os.path.normpath(name)
   2369 
   2370         for member in reversed(members):
   2371             if normalize:
   2372                 member_name = os.path.normpath(member.name)
   2373             else:
   2374                 member_name = member.name
   2375 
   2376             if name == member_name:
   2377                 return member
   2378 
   2379     def _load(self):
   2380         """Read through the entire archive file and look for readable
   2381            members.
   2382         """
   2383         while True:
   2384             tarinfo = self.next()
   2385             if tarinfo is None:
   2386                 break
   2387         self._loaded = True
   2388 
   2389     def _check(self, mode=None):
   2390         """Check if TarFile is still open, and if the operation's mode
   2391            corresponds to TarFile's mode.
   2392         """
   2393         if self.closed:
   2394             raise IOError("%s is closed" % self.__class__.__name__)
   2395         if mode is not None and self.mode not in mode:
   2396             raise IOError("bad operation for mode %r" % self.mode)
   2397 
   2398     def _find_link_target(self, tarinfo):
   2399         """Find the target member of a symlink or hardlink member in the
   2400            archive.
   2401         """
   2402         if tarinfo.issym():
   2403             # Always search the entire archive.

   2404             linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname
   2405             limit = None
   2406         else:
   2407             # Search the archive before the link, because a hard link is

   2408             # just a reference to an already archived file.

   2409             linkname = tarinfo.linkname
   2410             limit = tarinfo
   2411 
   2412         member = self._getmember(linkname, tarinfo=limit, normalize=True)
   2413         if member is None:
   2414             raise KeyError("linkname %r not found" % linkname)
   2415         return member
   2416 
   2417     def __iter__(self):
   2418         """Provide an iterator object.
   2419         """
   2420         if self._loaded:
   2421             return iter(self.members)
   2422         else:
   2423             return TarIter(self)
   2424 
   2425     def _dbg(self, level, msg):
   2426         """Write debugging output to sys.stderr.
   2427         """
   2428         if level <= self.debug:
   2429             print >> sys.stderr, msg
   2430 
   2431     def __enter__(self):
   2432         self._check()
   2433         return self
   2434 
   2435     def __exit__(self, type, value, traceback):
   2436         if type is None:
   2437             self.close()
   2438         else:
   2439             # An exception occurred. We must not call close() because

   2440             # it would try to write end-of-archive blocks and padding.

   2441             if not self._extfileobj:
   2442                 self.fileobj.close()
   2443             self.closed = True
   2444 # class TarFile

   2445 
   2446 class TarIter:
   2447     """Iterator Class.
   2448 
   2449        for tarinfo in TarFile(...):
   2450            suite...
   2451     """
   2452 
   2453     def __init__(self, tarfile):
   2454         """Construct a TarIter object.
   2455         """
   2456         self.tarfile = tarfile
   2457         self.index = 0
   2458     def __iter__(self):
   2459         """Return iterator object.
   2460         """
   2461         return self
   2462     def next(self):
   2463         """Return the next item using TarFile's next() method.
   2464            When all members have been read, set TarFile as _loaded.
   2465         """
   2466         # Fix for SF #1100429: Under rare circumstances it can

   2467         # happen that getmembers() is called during iteration,

   2468         # which will cause TarIter to stop prematurely.

   2469         if not self.tarfile._loaded:
   2470             tarinfo = self.tarfile.next()
   2471             if not tarinfo:
   2472                 self.tarfile._loaded = True
   2473                 raise StopIteration
   2474         else:
   2475             try:
   2476                 tarinfo = self.tarfile.members[self.index]
   2477             except IndexError:
   2478                 raise StopIteration
   2479         self.index += 1
   2480         return tarinfo
   2481 
   2482 # Helper classes for sparse file support

   2483 class _section:
   2484     """Base class for _data and _hole.
   2485     """
   2486     def __init__(self, offset, size):
   2487         self.offset = offset
   2488         self.size = size
   2489     def __contains__(self, offset):
   2490         return self.offset <= offset < self.offset + self.size
   2491 
   2492 class _data(_section):
   2493     """Represent a data section in a sparse file.
   2494     """
   2495     def __init__(self, offset, size, realpos):
   2496         _section.__init__(self, offset, size)
   2497         self.realpos = realpos
   2498 
   2499 class _hole(_section):
   2500     """Represent a hole section in a sparse file.
   2501     """
   2502     pass
   2503 
   2504 class _ringbuffer(list):
   2505     """Ringbuffer class which increases performance
   2506        over a regular list.
   2507     """
   2508     def __init__(self):
   2509         self.idx = 0
   2510     def find(self, offset):
   2511         idx = self.idx
   2512         while True:
   2513             item = self[idx]
   2514             if offset in item:
   2515                 break
   2516             idx += 1
   2517             if idx == len(self):
   2518                 idx = 0
   2519             if idx == self.idx:
   2520                 # End of File

   2521                 return None
   2522         self.idx = idx
   2523         return item
   2524 
   2525 #---------------------------------------------

   2526 # zipfile compatible TarFile class

   2527 #---------------------------------------------

   2528 TAR_PLAIN = 0           # zipfile.ZIP_STORED

   2529 TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED

   2530 class TarFileCompat:
   2531     """TarFile class compatible with standard module zipfile's
   2532        ZipFile class.
   2533     """
   2534     def __init__(self, file, mode="r", compression=TAR_PLAIN):
   2535         from warnings import warnpy3k
   2536         warnpy3k("the TarFileCompat class has been removed in Python 3.0",
   2537                 stacklevel=2)
   2538         if compression == TAR_PLAIN:
   2539             self.tarfile = TarFile.taropen(file, mode)
   2540         elif compression == TAR_GZIPPED:
   2541             self.tarfile = TarFile.gzopen(file, mode)
   2542         else:
   2543             raise ValueError("unknown compression constant")
   2544         if mode[0:1] == "r":
   2545             members = self.tarfile.getmembers()
   2546             for m in members:
   2547                 m.filename = m.name
   2548                 m.file_size = m.size
   2549                 m.date_time = time.gmtime(m.mtime)[:6]
   2550     def namelist(self):
   2551         return map(lambda m: m.name, self.infolist())
   2552     def infolist(self):
   2553         return filter(lambda m: m.type in REGULAR_TYPES,
   2554                       self.tarfile.getmembers())
   2555     def printdir(self):
   2556         self.tarfile.list()
   2557     def testzip(self):
   2558         return
   2559     def getinfo(self, name):
   2560         return self.tarfile.getmember(name)
   2561     def read(self, name):
   2562         return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
   2563     def write(self, filename, arcname=None, compress_type=None):
   2564         self.tarfile.add(filename, arcname)
   2565     def writestr(self, zinfo, bytes):
   2566         try:
   2567             from cStringIO import StringIO
   2568         except ImportError:
   2569             from StringIO import StringIO
   2570         import calendar
   2571         tinfo = TarInfo(zinfo.filename)
   2572         tinfo.size = len(bytes)
   2573         tinfo.mtime = calendar.timegm(zinfo.date_time)
   2574         self.tarfile.addfile(tinfo, StringIO(bytes))
   2575     def close(self):
   2576         self.tarfile.close()
   2577 #class TarFileCompat

   2578 
   2579 #--------------------

   2580 # exported functions

   2581 #--------------------

   2582 def is_tarfile(name):
   2583     """Return True if name points to a tar archive that we
   2584        are able to handle, else return False.
   2585     """
   2586     try:
   2587         t = open(name)
   2588         t.close()
   2589         return True
   2590     except TarError:
   2591         return False
   2592 
   2593 bltn_open = open
   2594 open = TarFile.open
   2595