1 #!/usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 #------------------------------------------------------------------- 4 # tarfile.py 5 #------------------------------------------------------------------- 6 # Copyright (C) 2002 Lars Gustbel <lars (at] gustaebel.de> 7 # All rights reserved. 8 # 9 # Permission is hereby granted, free of charge, to any person 10 # obtaining a copy of this software and associated documentation 11 # files (the "Software"), to deal in the Software without 12 # restriction, including without limitation the rights to use, 13 # copy, modify, merge, publish, distribute, sublicense, and/or sell 14 # copies of the Software, and to permit persons to whom the 15 # Software is furnished to do so, subject to the following 16 # conditions: 17 # 18 # The above copyright notice and this permission notice shall be 19 # included in all copies or substantial portions of the Software. 20 # 21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 28 # OTHER DEALINGS IN THE SOFTWARE. 29 # 30 """Read from and write to tar format archives. 31 """ 32 33 __version__ = "$Revision: 85213 $" 34 # $Source$ 35 36 version = "0.9.0" 37 __author__ = "Lars Gustbel (lars (at] gustaebel.de)" 38 __date__ = "$Date$" 39 __cvsid__ = "$Id$" 40 __credits__ = "Gustavo Niemeyer, Niels Gustbel, Richard Townsend." 41 42 #--------- 43 # Imports 44 #--------- 45 import sys 46 import os 47 import shutil 48 import stat 49 import errno 50 import time 51 import struct 52 import copy 53 import re 54 import operator 55 56 try: 57 import grp, pwd 58 except ImportError: 59 grp = pwd = None 60 61 # from tarfile import * 62 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] 63 64 #--------------------------------------------------------- 65 # tar constants 66 #--------------------------------------------------------- 67 NUL = "\0" # the null character 68 BLOCKSIZE = 512 # length of processing blocks 69 RECORDSIZE = BLOCKSIZE * 20 # length of records 70 GNU_MAGIC = "ustar \0" # magic gnu tar string 71 POSIX_MAGIC = "ustar\x0000" # magic posix tar string 72 73 LENGTH_NAME = 100 # maximum length of a filename 74 LENGTH_LINK = 100 # maximum length of a linkname 75 LENGTH_PREFIX = 155 # maximum length of the prefix field 76 77 REGTYPE = "0" # regular file 78 AREGTYPE = "\0" # regular file 79 LNKTYPE = "1" # link (inside tarfile) 80 SYMTYPE = "2" # symbolic link 81 CHRTYPE = "3" # character special device 82 BLKTYPE = "4" # block special device 83 DIRTYPE = "5" # directory 84 FIFOTYPE = "6" # fifo special device 85 CONTTYPE = "7" # contiguous file 86 87 GNUTYPE_LONGNAME = "L" # GNU tar longname 88 GNUTYPE_LONGLINK = "K" # GNU tar longlink 89 GNUTYPE_SPARSE = "S" # GNU tar sparse file 90 91 XHDTYPE = "x" # POSIX.1-2001 extended header 92 XGLTYPE = "g" # POSIX.1-2001 global header 93 SOLARIS_XHDTYPE = "X" # Solaris extended header 94 95 USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format 96 GNU_FORMAT = 1 # GNU tar format 97 PAX_FORMAT = 2 # POSIX.1-2001 (pax) format 98 DEFAULT_FORMAT = GNU_FORMAT 99 100 #--------------------------------------------------------- 101 # tarfile constants 102 #--------------------------------------------------------- 103 # File types that tarfile supports: 104 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, 105 SYMTYPE, DIRTYPE, FIFOTYPE, 106 CONTTYPE, CHRTYPE, BLKTYPE, 107 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 108 GNUTYPE_SPARSE) 109 110 # File types that will be treated as a regular file. 111 REGULAR_TYPES = (REGTYPE, AREGTYPE, 112 CONTTYPE, GNUTYPE_SPARSE) 113 114 # File types that are part of the GNU tar format. 115 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 116 GNUTYPE_SPARSE) 117 118 # Fields from a pax header that override a TarInfo attribute. 119 PAX_FIELDS = ("path", "linkpath", "size", "mtime", 120 "uid", "gid", "uname", "gname") 121 122 # Fields in a pax header that are numbers, all other fields 123 # are treated as strings. 124 PAX_NUMBER_FIELDS = { 125 "atime": float, 126 "ctime": float, 127 "mtime": float, 128 "uid": int, 129 "gid": int, 130 "size": int 131 } 132 133 #--------------------------------------------------------- 134 # Bits used in the mode field, values in octal. 135 #--------------------------------------------------------- 136 S_IFLNK = 0120000 # symbolic link 137 S_IFREG = 0100000 # regular file 138 S_IFBLK = 0060000 # block device 139 S_IFDIR = 0040000 # directory 140 S_IFCHR = 0020000 # character device 141 S_IFIFO = 0010000 # fifo 142 143 TSUID = 04000 # set UID on execution 144 TSGID = 02000 # set GID on execution 145 TSVTX = 01000 # reserved 146 147 TUREAD = 0400 # read by owner 148 TUWRITE = 0200 # write by owner 149 TUEXEC = 0100 # execute/search by owner 150 TGREAD = 0040 # read by group 151 TGWRITE = 0020 # write by group 152 TGEXEC = 0010 # execute/search by group 153 TOREAD = 0004 # read by other 154 TOWRITE = 0002 # write by other 155 TOEXEC = 0001 # execute/search by other 156 157 #--------------------------------------------------------- 158 # initialization 159 #--------------------------------------------------------- 160 ENCODING = sys.getfilesystemencoding() 161 if ENCODING is None: 162 ENCODING = sys.getdefaultencoding() 163 164 #--------------------------------------------------------- 165 # Some useful functions 166 #--------------------------------------------------------- 167 168 def stn(s, length): 169 """Convert a python string to a null-terminated string buffer. 170 """ 171 return s[:length] + (length - len(s)) * NUL 172 173 def nts(s): 174 """Convert a null-terminated string field to a python string. 175 """ 176 # Use the string up to the first null char. 177 p = s.find("\0") 178 if p == -1: 179 return s 180 return s[:p] 181 182 def nti(s): 183 """Convert a number field to a python number. 184 """ 185 # There are two possible encodings for a number field, see 186 # itn() below. 187 if s[0] != chr(0200): 188 try: 189 n = int(nts(s) or "0", 8) 190 except ValueError: 191 raise InvalidHeaderError("invalid header") 192 else: 193 n = 0L 194 for i in xrange(len(s) - 1): 195 n <<= 8 196 n += ord(s[i + 1]) 197 return n 198 199 def itn(n, digits=8, format=DEFAULT_FORMAT): 200 """Convert a python number to a number field. 201 """ 202 # POSIX 1003.1-1988 requires numbers to be encoded as a string of 203 # octal digits followed by a null-byte, this allows values up to 204 # (8**(digits-1))-1. GNU tar allows storing numbers greater than 205 # that if necessary. A leading 0200 byte indicates this particular 206 # encoding, the following digits-1 bytes are a big-endian 207 # representation. This allows values up to (256**(digits-1))-1. 208 if 0 <= n < 8 ** (digits - 1): 209 s = "%0*o" % (digits - 1, n) + NUL 210 else: 211 if format != GNU_FORMAT or n >= 256 ** (digits - 1): 212 raise ValueError("overflow in number field") 213 214 if n < 0: 215 # XXX We mimic GNU tar's behaviour with negative numbers, 216 # this could raise OverflowError. 217 n = struct.unpack("L", struct.pack("l", n))[0] 218 219 s = "" 220 for i in xrange(digits - 1): 221 s = chr(n & 0377) + s 222 n >>= 8 223 s = chr(0200) + s 224 return s 225 226 def uts(s, encoding, errors): 227 """Convert a unicode object to a string. 228 """ 229 if errors == "utf-8": 230 # An extra error handler similar to the -o invalid=UTF-8 option 231 # in POSIX.1-2001. Replace untranslatable characters with their 232 # UTF-8 representation. 233 try: 234 return s.encode(encoding, "strict") 235 except UnicodeEncodeError: 236 x = [] 237 for c in s: 238 try: 239 x.append(c.encode(encoding, "strict")) 240 except UnicodeEncodeError: 241 x.append(c.encode("utf8")) 242 return "".join(x) 243 else: 244 return s.encode(encoding, errors) 245 246 def calc_chksums(buf): 247 """Calculate the checksum for a member's header by summing up all 248 characters except for the chksum field which is treated as if 249 it was filled with spaces. According to the GNU tar sources, 250 some tars (Sun and NeXT) calculate chksum with signed char, 251 which will be different if there are chars in the buffer with 252 the high bit set. So we calculate two checksums, unsigned and 253 signed. 254 """ 255 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512])) 256 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512])) 257 return unsigned_chksum, signed_chksum 258 259 def copyfileobj(src, dst, length=None): 260 """Copy length bytes from fileobj src to fileobj dst. 261 If length is None, copy the entire content. 262 """ 263 if length == 0: 264 return 265 if length is None: 266 shutil.copyfileobj(src, dst) 267 return 268 269 BUFSIZE = 16 * 1024 270 blocks, remainder = divmod(length, BUFSIZE) 271 for b in xrange(blocks): 272 buf = src.read(BUFSIZE) 273 if len(buf) < BUFSIZE: 274 raise IOError("end of file reached") 275 dst.write(buf) 276 277 if remainder != 0: 278 buf = src.read(remainder) 279 if len(buf) < remainder: 280 raise IOError("end of file reached") 281 dst.write(buf) 282 return 283 284 filemode_table = ( 285 ((S_IFLNK, "l"), 286 (S_IFREG, "-"), 287 (S_IFBLK, "b"), 288 (S_IFDIR, "d"), 289 (S_IFCHR, "c"), 290 (S_IFIFO, "p")), 291 292 ((TUREAD, "r"),), 293 ((TUWRITE, "w"),), 294 ((TUEXEC|TSUID, "s"), 295 (TSUID, "S"), 296 (TUEXEC, "x")), 297 298 ((TGREAD, "r"),), 299 ((TGWRITE, "w"),), 300 ((TGEXEC|TSGID, "s"), 301 (TSGID, "S"), 302 (TGEXEC, "x")), 303 304 ((TOREAD, "r"),), 305 ((TOWRITE, "w"),), 306 ((TOEXEC|TSVTX, "t"), 307 (TSVTX, "T"), 308 (TOEXEC, "x")) 309 ) 310 311 def filemode(mode): 312 """Convert a file's mode to a string of the form 313 -rwxrwxrwx. 314 Used by TarFile.list() 315 """ 316 perm = [] 317 for table in filemode_table: 318 for bit, char in table: 319 if mode & bit == bit: 320 perm.append(char) 321 break 322 else: 323 perm.append("-") 324 return "".join(perm) 325 326 class TarError(Exception): 327 """Base exception.""" 328 pass 329 class ExtractError(TarError): 330 """General exception for extract errors.""" 331 pass 332 class ReadError(TarError): 333 """Exception for unreadble tar archives.""" 334 pass 335 class CompressionError(TarError): 336 """Exception for unavailable compression methods.""" 337 pass 338 class StreamError(TarError): 339 """Exception for unsupported operations on stream-like TarFiles.""" 340 pass 341 class HeaderError(TarError): 342 """Base exception for header errors.""" 343 pass 344 class EmptyHeaderError(HeaderError): 345 """Exception for empty headers.""" 346 pass 347 class TruncatedHeaderError(HeaderError): 348 """Exception for truncated headers.""" 349 pass 350 class EOFHeaderError(HeaderError): 351 """Exception for end of file headers.""" 352 pass 353 class InvalidHeaderError(HeaderError): 354 """Exception for invalid headers.""" 355 pass 356 class SubsequentHeaderError(HeaderError): 357 """Exception for missing and invalid extended headers.""" 358 pass 359 360 #--------------------------- 361 # internal stream interface 362 #--------------------------- 363 class _LowLevelFile: 364 """Low-level file object. Supports reading and writing. 365 It is used instead of a regular file object for streaming 366 access. 367 """ 368 369 def __init__(self, name, mode): 370 mode = { 371 "r": os.O_RDONLY, 372 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 373 }[mode] 374 if hasattr(os, "O_BINARY"): 375 mode |= os.O_BINARY 376 self.fd = os.open(name, mode, 0666) 377 378 def close(self): 379 os.close(self.fd) 380 381 def read(self, size): 382 return os.read(self.fd, size) 383 384 def write(self, s): 385 os.write(self.fd, s) 386 387 class _Stream: 388 """Class that serves as an adapter between TarFile and 389 a stream-like object. The stream-like object only 390 needs to have a read() or write() method and is accessed 391 blockwise. Use of gzip or bzip2 compression is possible. 392 A stream-like object could be for example: sys.stdin, 393 sys.stdout, a socket, a tape device etc. 394 395 _Stream is intended to be used only internally. 396 """ 397 398 def __init__(self, name, mode, comptype, fileobj, bufsize): 399 """Construct a _Stream object. 400 """ 401 self._extfileobj = True 402 if fileobj is None: 403 fileobj = _LowLevelFile(name, mode) 404 self._extfileobj = False 405 406 if comptype == '*': 407 # Enable transparent compression detection for the 408 # stream interface 409 fileobj = _StreamProxy(fileobj) 410 comptype = fileobj.getcomptype() 411 412 self.name = name or "" 413 self.mode = mode 414 self.comptype = comptype 415 self.fileobj = fileobj 416 self.bufsize = bufsize 417 self.buf = "" 418 self.pos = 0L 419 self.closed = False 420 421 if comptype == "gz": 422 try: 423 import zlib 424 except ImportError: 425 raise CompressionError("zlib module is not available") 426 self.zlib = zlib 427 self.crc = zlib.crc32("") & 0xffffffffL 428 if mode == "r": 429 self._init_read_gz() 430 else: 431 self._init_write_gz() 432 433 if comptype == "bz2": 434 try: 435 import bz2 436 except ImportError: 437 raise CompressionError("bz2 module is not available") 438 if mode == "r": 439 self.dbuf = "" 440 self.cmp = bz2.BZ2Decompressor() 441 else: 442 self.cmp = bz2.BZ2Compressor() 443 444 def __del__(self): 445 if hasattr(self, "closed") and not self.closed: 446 self.close() 447 448 def _init_write_gz(self): 449 """Initialize for writing with gzip compression. 450 """ 451 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, 452 -self.zlib.MAX_WBITS, 453 self.zlib.DEF_MEM_LEVEL, 454 0) 455 timestamp = struct.pack("<L", long(time.time())) 456 self.__write("\037\213\010\010%s\002\377" % timestamp) 457 if type(self.name) is unicode: 458 self.name = self.name.encode("iso-8859-1", "replace") 459 if self.name.endswith(".gz"): 460 self.name = self.name[:-3] 461 self.__write(self.name + NUL) 462 463 def write(self, s): 464 """Write string s to the stream. 465 """ 466 if self.comptype == "gz": 467 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL 468 self.pos += len(s) 469 if self.comptype != "tar": 470 s = self.cmp.compress(s) 471 self.__write(s) 472 473 def __write(self, s): 474 """Write string s to the stream if a whole new block 475 is ready to be written. 476 """ 477 self.buf += s 478 while len(self.buf) > self.bufsize: 479 self.fileobj.write(self.buf[:self.bufsize]) 480 self.buf = self.buf[self.bufsize:] 481 482 def close(self): 483 """Close the _Stream object. No operation should be 484 done on it afterwards. 485 """ 486 if self.closed: 487 return 488 489 if self.mode == "w" and self.comptype != "tar": 490 self.buf += self.cmp.flush() 491 492 if self.mode == "w" and self.buf: 493 self.fileobj.write(self.buf) 494 self.buf = "" 495 if self.comptype == "gz": 496 # The native zlib crc is an unsigned 32-bit integer, but 497 # the Python wrapper implicitly casts that to a signed C 498 # long. So, on a 32-bit box self.crc may "look negative", 499 # while the same crc on a 64-bit box may "look positive". 500 # To avoid irksome warnings from the `struct` module, force 501 # it to look positive on all boxes. 502 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL)) 503 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL)) 504 505 if not self._extfileobj: 506 self.fileobj.close() 507 508 self.closed = True 509 510 def _init_read_gz(self): 511 """Initialize for reading a gzip compressed fileobj. 512 """ 513 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) 514 self.dbuf = "" 515 516 # taken from gzip.GzipFile with some alterations 517 if self.__read(2) != "\037\213": 518 raise ReadError("not a gzip file") 519 if self.__read(1) != "\010": 520 raise CompressionError("unsupported compression method") 521 522 flag = ord(self.__read(1)) 523 self.__read(6) 524 525 if flag & 4: 526 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1)) 527 self.read(xlen) 528 if flag & 8: 529 while True: 530 s = self.__read(1) 531 if not s or s == NUL: 532 break 533 if flag & 16: 534 while True: 535 s = self.__read(1) 536 if not s or s == NUL: 537 break 538 if flag & 2: 539 self.__read(2) 540 541 def tell(self): 542 """Return the stream's file pointer position. 543 """ 544 return self.pos 545 546 def seek(self, pos=0): 547 """Set the stream's file pointer to pos. Negative seeking 548 is forbidden. 549 """ 550 if pos - self.pos >= 0: 551 blocks, remainder = divmod(pos - self.pos, self.bufsize) 552 for i in xrange(blocks): 553 self.read(self.bufsize) 554 self.read(remainder) 555 else: 556 raise StreamError("seeking backwards is not allowed") 557 return self.pos 558 559 def read(self, size=None): 560 """Return the next size number of bytes from the stream. 561 If size is not defined, return all bytes of the stream 562 up to EOF. 563 """ 564 if size is None: 565 t = [] 566 while True: 567 buf = self._read(self.bufsize) 568 if not buf: 569 break 570 t.append(buf) 571 buf = "".join(t) 572 else: 573 buf = self._read(size) 574 self.pos += len(buf) 575 return buf 576 577 def _read(self, size): 578 """Return size bytes from the stream. 579 """ 580 if self.comptype == "tar": 581 return self.__read(size) 582 583 c = len(self.dbuf) 584 t = [self.dbuf] 585 while c < size: 586 buf = self.__read(self.bufsize) 587 if not buf: 588 break 589 try: 590 buf = self.cmp.decompress(buf) 591 except IOError: 592 raise ReadError("invalid compressed data") 593 t.append(buf) 594 c += len(buf) 595 t = "".join(t) 596 self.dbuf = t[size:] 597 return t[:size] 598 599 def __read(self, size): 600 """Return size bytes from stream. If internal buffer is empty, 601 read another block from the stream. 602 """ 603 c = len(self.buf) 604 t = [self.buf] 605 while c < size: 606 buf = self.fileobj.read(self.bufsize) 607 if not buf: 608 break 609 t.append(buf) 610 c += len(buf) 611 t = "".join(t) 612 self.buf = t[size:] 613 return t[:size] 614 # class _Stream 615 616 class _StreamProxy(object): 617 """Small proxy class that enables transparent compression 618 detection for the Stream interface (mode 'r|*'). 619 """ 620 621 def __init__(self, fileobj): 622 self.fileobj = fileobj 623 self.buf = self.fileobj.read(BLOCKSIZE) 624 625 def read(self, size): 626 self.read = self.fileobj.read 627 return self.buf 628 629 def getcomptype(self): 630 if self.buf.startswith("\037\213\010"): 631 return "gz" 632 if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY": 633 return "bz2" 634 return "tar" 635 636 def close(self): 637 self.fileobj.close() 638 # class StreamProxy 639 640 class _BZ2Proxy(object): 641 """Small proxy class that enables external file object 642 support for "r:bz2" and "w:bz2" modes. This is actually 643 a workaround for a limitation in bz2 module's BZ2File 644 class which (unlike gzip.GzipFile) has no support for 645 a file object argument. 646 """ 647 648 blocksize = 16 * 1024 649 650 def __init__(self, fileobj, mode): 651 self.fileobj = fileobj 652 self.mode = mode 653 self.name = getattr(self.fileobj, "name", None) 654 self.init() 655 656 def init(self): 657 import bz2 658 self.pos = 0 659 if self.mode == "r": 660 self.bz2obj = bz2.BZ2Decompressor() 661 self.fileobj.seek(0) 662 self.buf = "" 663 else: 664 self.bz2obj = bz2.BZ2Compressor() 665 666 def read(self, size): 667 b = [self.buf] 668 x = len(self.buf) 669 while x < size: 670 raw = self.fileobj.read(self.blocksize) 671 if not raw: 672 break 673 data = self.bz2obj.decompress(raw) 674 b.append(data) 675 x += len(data) 676 self.buf = "".join(b) 677 678 buf = self.buf[:size] 679 self.buf = self.buf[size:] 680 self.pos += len(buf) 681 return buf 682 683 def seek(self, pos): 684 if pos < self.pos: 685 self.init() 686 self.read(pos - self.pos) 687 688 def tell(self): 689 return self.pos 690 691 def write(self, data): 692 self.pos += len(data) 693 raw = self.bz2obj.compress(data) 694 self.fileobj.write(raw) 695 696 def close(self): 697 if self.mode == "w": 698 raw = self.bz2obj.flush() 699 self.fileobj.write(raw) 700 # class _BZ2Proxy 701 702 #------------------------ 703 # Extraction file object 704 #------------------------ 705 class _FileInFile(object): 706 """A thin wrapper around an existing file object that 707 provides a part of its data as an individual file 708 object. 709 """ 710 711 def __init__(self, fileobj, offset, size, sparse=None): 712 self.fileobj = fileobj 713 self.offset = offset 714 self.size = size 715 self.sparse = sparse 716 self.position = 0 717 718 def tell(self): 719 """Return the current file position. 720 """ 721 return self.position 722 723 def seek(self, position): 724 """Seek to a position in the file. 725 """ 726 self.position = position 727 728 def read(self, size=None): 729 """Read data from the file. 730 """ 731 if size is None: 732 size = self.size - self.position 733 else: 734 size = min(size, self.size - self.position) 735 736 if self.sparse is None: 737 return self.readnormal(size) 738 else: 739 return self.readsparse(size) 740 741 def readnormal(self, size): 742 """Read operation for regular files. 743 """ 744 self.fileobj.seek(self.offset + self.position) 745 self.position += size 746 return self.fileobj.read(size) 747 748 def readsparse(self, size): 749 """Read operation for sparse files. 750 """ 751 data = [] 752 while size > 0: 753 buf = self.readsparsesection(size) 754 if not buf: 755 break 756 size -= len(buf) 757 data.append(buf) 758 return "".join(data) 759 760 def readsparsesection(self, size): 761 """Read a single section of a sparse file. 762 """ 763 section = self.sparse.find(self.position) 764 765 if section is None: 766 return "" 767 768 size = min(size, section.offset + section.size - self.position) 769 770 if isinstance(section, _data): 771 realpos = section.realpos + self.position - section.offset 772 self.fileobj.seek(self.offset + realpos) 773 self.position += size 774 return self.fileobj.read(size) 775 else: 776 self.position += size 777 return NUL * size 778 #class _FileInFile 779 780 781 class ExFileObject(object): 782 """File-like object for reading an archive member. 783 Is returned by TarFile.extractfile(). 784 """ 785 blocksize = 1024 786 787 def __init__(self, tarfile, tarinfo): 788 self.fileobj = _FileInFile(tarfile.fileobj, 789 tarinfo.offset_data, 790 tarinfo.size, 791 getattr(tarinfo, "sparse", None)) 792 self.name = tarinfo.name 793 self.mode = "r" 794 self.closed = False 795 self.size = tarinfo.size 796 797 self.position = 0 798 self.buffer = "" 799 800 def read(self, size=None): 801 """Read at most size bytes from the file. If size is not 802 present or None, read all data until EOF is reached. 803 """ 804 if self.closed: 805 raise ValueError("I/O operation on closed file") 806 807 buf = "" 808 if self.buffer: 809 if size is None: 810 buf = self.buffer 811 self.buffer = "" 812 else: 813 buf = self.buffer[:size] 814 self.buffer = self.buffer[size:] 815 816 if size is None: 817 buf += self.fileobj.read() 818 else: 819 buf += self.fileobj.read(size - len(buf)) 820 821 self.position += len(buf) 822 return buf 823 824 def readline(self, size=-1): 825 """Read one entire line from the file. If size is present 826 and non-negative, return a string with at most that 827 size, which may be an incomplete line. 828 """ 829 if self.closed: 830 raise ValueError("I/O operation on closed file") 831 832 if "\n" in self.buffer: 833 pos = self.buffer.find("\n") + 1 834 else: 835 buffers = [self.buffer] 836 while True: 837 buf = self.fileobj.read(self.blocksize) 838 buffers.append(buf) 839 if not buf or "\n" in buf: 840 self.buffer = "".join(buffers) 841 pos = self.buffer.find("\n") + 1 842 if pos == 0: 843 # no newline found. 844 pos = len(self.buffer) 845 break 846 847 if size != -1: 848 pos = min(size, pos) 849 850 buf = self.buffer[:pos] 851 self.buffer = self.buffer[pos:] 852 self.position += len(buf) 853 return buf 854 855 def readlines(self): 856 """Return a list with all remaining lines. 857 """ 858 result = [] 859 while True: 860 line = self.readline() 861 if not line: break 862 result.append(line) 863 return result 864 865 def tell(self): 866 """Return the current file position. 867 """ 868 if self.closed: 869 raise ValueError("I/O operation on closed file") 870 871 return self.position 872 873 def seek(self, pos, whence=os.SEEK_SET): 874 """Seek to a position in the file. 875 """ 876 if self.closed: 877 raise ValueError("I/O operation on closed file") 878 879 if whence == os.SEEK_SET: 880 self.position = min(max(pos, 0), self.size) 881 elif whence == os.SEEK_CUR: 882 if pos < 0: 883 self.position = max(self.position + pos, 0) 884 else: 885 self.position = min(self.position + pos, self.size) 886 elif whence == os.SEEK_END: 887 self.position = max(min(self.size + pos, self.size), 0) 888 else: 889 raise ValueError("Invalid argument") 890 891 self.buffer = "" 892 self.fileobj.seek(self.position) 893 894 def close(self): 895 """Close the file object. 896 """ 897 self.closed = True 898 899 def __iter__(self): 900 """Get an iterator over the file's lines. 901 """ 902 while True: 903 line = self.readline() 904 if not line: 905 break 906 yield line 907 #class ExFileObject 908 909 #------------------ 910 # Exported Classes 911 #------------------ 912 class TarInfo(object): 913 """Informational class which holds the details about an 914 archive member given by a tar header block. 915 TarInfo objects are returned by TarFile.getmember(), 916 TarFile.getmembers() and TarFile.gettarinfo() and are 917 usually created internally. 918 """ 919 920 def __init__(self, name=""): 921 """Construct a TarInfo object. name is the optional name 922 of the member. 923 """ 924 self.name = name # member name 925 self.mode = 0644 # file permissions 926 self.uid = 0 # user id 927 self.gid = 0 # group id 928 self.size = 0 # file size 929 self.mtime = 0 # modification time 930 self.chksum = 0 # header checksum 931 self.type = REGTYPE # member type 932 self.linkname = "" # link name 933 self.uname = "" # user name 934 self.gname = "" # group name 935 self.devmajor = 0 # device major number 936 self.devminor = 0 # device minor number 937 938 self.offset = 0 # the tar header starts here 939 self.offset_data = 0 # the file's data starts here 940 941 self.pax_headers = {} # pax header information 942 943 # In pax headers the "name" and "linkname" field are called 944 # "path" and "linkpath". 945 def _getpath(self): 946 return self.name 947 def _setpath(self, name): 948 self.name = name 949 path = property(_getpath, _setpath) 950 951 def _getlinkpath(self): 952 return self.linkname 953 def _setlinkpath(self, linkname): 954 self.linkname = linkname 955 linkpath = property(_getlinkpath, _setlinkpath) 956 957 def __repr__(self): 958 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) 959 960 def get_info(self, encoding, errors): 961 """Return the TarInfo's attributes as a dictionary. 962 """ 963 info = { 964 "name": self.name, 965 "mode": self.mode & 07777, 966 "uid": self.uid, 967 "gid": self.gid, 968 "size": self.size, 969 "mtime": self.mtime, 970 "chksum": self.chksum, 971 "type": self.type, 972 "linkname": self.linkname, 973 "uname": self.uname, 974 "gname": self.gname, 975 "devmajor": self.devmajor, 976 "devminor": self.devminor 977 } 978 979 if info["type"] == DIRTYPE and not info["name"].endswith("/"): 980 info["name"] += "/" 981 982 for key in ("name", "linkname", "uname", "gname"): 983 if type(info[key]) is unicode: 984 info[key] = info[key].encode(encoding, errors) 985 986 return info 987 988 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"): 989 """Return a tar header as a string of 512 byte blocks. 990 """ 991 info = self.get_info(encoding, errors) 992 993 if format == USTAR_FORMAT: 994 return self.create_ustar_header(info) 995 elif format == GNU_FORMAT: 996 return self.create_gnu_header(info) 997 elif format == PAX_FORMAT: 998 return self.create_pax_header(info, encoding, errors) 999 else: 1000 raise ValueError("invalid format") 1001 1002 def create_ustar_header(self, info): 1003 """Return the object as a ustar header block. 1004 """ 1005 info["magic"] = POSIX_MAGIC 1006 1007 if len(info["linkname"]) > LENGTH_LINK: 1008 raise ValueError("linkname is too long") 1009 1010 if len(info["name"]) > LENGTH_NAME: 1011 info["prefix"], info["name"] = self._posix_split_name(info["name"]) 1012 1013 return self._create_header(info, USTAR_FORMAT) 1014 1015 def create_gnu_header(self, info): 1016 """Return the object as a GNU header block sequence. 1017 """ 1018 info["magic"] = GNU_MAGIC 1019 1020 buf = "" 1021 if len(info["linkname"]) > LENGTH_LINK: 1022 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK) 1023 1024 if len(info["name"]) > LENGTH_NAME: 1025 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME) 1026 1027 return buf + self._create_header(info, GNU_FORMAT) 1028 1029 def create_pax_header(self, info, encoding, errors): 1030 """Return the object as a ustar header block. If it cannot be 1031 represented this way, prepend a pax extended header sequence 1032 with supplement information. 1033 """ 1034 info["magic"] = POSIX_MAGIC 1035 pax_headers = self.pax_headers.copy() 1036 1037 # Test string fields for values that exceed the field length or cannot 1038 # be represented in ASCII encoding. 1039 for name, hname, length in ( 1040 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), 1041 ("uname", "uname", 32), ("gname", "gname", 32)): 1042 1043 if hname in pax_headers: 1044 # The pax header has priority. 1045 continue 1046 1047 val = info[name].decode(encoding, errors) 1048 1049 # Try to encode the string as ASCII. 1050 try: 1051 val.encode("ascii") 1052 except UnicodeEncodeError: 1053 pax_headers[hname] = val 1054 continue 1055 1056 if len(info[name]) > length: 1057 pax_headers[hname] = val 1058 1059 # Test number fields for values that exceed the field limit or values 1060 # that like to be stored as float. 1061 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): 1062 if name in pax_headers: 1063 # The pax header has priority. Avoid overflow. 1064 info[name] = 0 1065 continue 1066 1067 val = info[name] 1068 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): 1069 pax_headers[name] = unicode(val) 1070 info[name] = 0 1071 1072 # Create a pax extended header if necessary. 1073 if pax_headers: 1074 buf = self._create_pax_generic_header(pax_headers) 1075 else: 1076 buf = "" 1077 1078 return buf + self._create_header(info, USTAR_FORMAT) 1079 1080 @classmethod 1081 def create_pax_global_header(cls, pax_headers): 1082 """Return the object as a pax global header block sequence. 1083 """ 1084 return cls._create_pax_generic_header(pax_headers, type=XGLTYPE) 1085 1086 def _posix_split_name(self, name): 1087 """Split a name longer than 100 chars into a prefix 1088 and a name part. 1089 """ 1090 prefix = name[:LENGTH_PREFIX + 1] 1091 while prefix and prefix[-1] != "/": 1092 prefix = prefix[:-1] 1093 1094 name = name[len(prefix):] 1095 prefix = prefix[:-1] 1096 1097 if not prefix or len(name) > LENGTH_NAME: 1098 raise ValueError("name is too long") 1099 return prefix, name 1100 1101 @staticmethod 1102 def _create_header(info, format): 1103 """Return a header block. info is a dictionary with file 1104 information, format must be one of the *_FORMAT constants. 1105 """ 1106 parts = [ 1107 stn(info.get("name", ""), 100), 1108 itn(info.get("mode", 0) & 07777, 8, format), 1109 itn(info.get("uid", 0), 8, format), 1110 itn(info.get("gid", 0), 8, format), 1111 itn(info.get("size", 0), 12, format), 1112 itn(info.get("mtime", 0), 12, format), 1113 " ", # checksum field 1114 info.get("type", REGTYPE), 1115 stn(info.get("linkname", ""), 100), 1116 stn(info.get("magic", POSIX_MAGIC), 8), 1117 stn(info.get("uname", ""), 32), 1118 stn(info.get("gname", ""), 32), 1119 itn(info.get("devmajor", 0), 8, format), 1120 itn(info.get("devminor", 0), 8, format), 1121 stn(info.get("prefix", ""), 155) 1122 ] 1123 1124 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts)) 1125 chksum = calc_chksums(buf[-BLOCKSIZE:])[0] 1126 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:] 1127 return buf 1128 1129 @staticmethod 1130 def _create_payload(payload): 1131 """Return the string payload filled with zero bytes 1132 up to the next 512 byte border. 1133 """ 1134 blocks, remainder = divmod(len(payload), BLOCKSIZE) 1135 if remainder > 0: 1136 payload += (BLOCKSIZE - remainder) * NUL 1137 return payload 1138 1139 @classmethod 1140 def _create_gnu_long_header(cls, name, type): 1141 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence 1142 for name. 1143 """ 1144 name += NUL 1145 1146 info = {} 1147 info["name"] = "././@LongLink" 1148 info["type"] = type 1149 info["size"] = len(name) 1150 info["magic"] = GNU_MAGIC 1151 1152 # create extended header + name blocks. 1153 return cls._create_header(info, USTAR_FORMAT) + \ 1154 cls._create_payload(name) 1155 1156 @classmethod 1157 def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE): 1158 """Return a POSIX.1-2001 extended or global header sequence 1159 that contains a list of keyword, value pairs. The values 1160 must be unicode objects. 1161 """ 1162 records = [] 1163 for keyword, value in pax_headers.iteritems(): 1164 keyword = keyword.encode("utf8") 1165 value = value.encode("utf8") 1166 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' 1167 n = p = 0 1168 while True: 1169 n = l + len(str(p)) 1170 if n == p: 1171 break 1172 p = n 1173 records.append("%d %s=%s\n" % (p, keyword, value)) 1174 records = "".join(records) 1175 1176 # We use a hardcoded "././@PaxHeader" name like star does 1177 # instead of the one that POSIX recommends. 1178 info = {} 1179 info["name"] = "././@PaxHeader" 1180 info["type"] = type 1181 info["size"] = len(records) 1182 info["magic"] = POSIX_MAGIC 1183 1184 # Create pax header + record blocks. 1185 return cls._create_header(info, USTAR_FORMAT) + \ 1186 cls._create_payload(records) 1187 1188 @classmethod 1189 def frombuf(cls, buf): 1190 """Construct a TarInfo object from a 512 byte string buffer. 1191 """ 1192 if len(buf) == 0: 1193 raise EmptyHeaderError("empty header") 1194 if len(buf) != BLOCKSIZE: 1195 raise TruncatedHeaderError("truncated header") 1196 if buf.count(NUL) == BLOCKSIZE: 1197 raise EOFHeaderError("end of file header") 1198 1199 chksum = nti(buf[148:156]) 1200 if chksum not in calc_chksums(buf): 1201 raise InvalidHeaderError("bad checksum") 1202 1203 obj = cls() 1204 obj.buf = buf 1205 obj.name = nts(buf[0:100]) 1206 obj.mode = nti(buf[100:108]) 1207 obj.uid = nti(buf[108:116]) 1208 obj.gid = nti(buf[116:124]) 1209 obj.size = nti(buf[124:136]) 1210 obj.mtime = nti(buf[136:148]) 1211 obj.chksum = chksum 1212 obj.type = buf[156:157] 1213 obj.linkname = nts(buf[157:257]) 1214 obj.uname = nts(buf[265:297]) 1215 obj.gname = nts(buf[297:329]) 1216 obj.devmajor = nti(buf[329:337]) 1217 obj.devminor = nti(buf[337:345]) 1218 prefix = nts(buf[345:500]) 1219 1220 # Old V7 tar format represents a directory as a regular 1221 # file with a trailing slash. 1222 if obj.type == AREGTYPE and obj.name.endswith("/"): 1223 obj.type = DIRTYPE 1224 1225 # Remove redundant slashes from directories. 1226 if obj.isdir(): 1227 obj.name = obj.name.rstrip("/") 1228 1229 # Reconstruct a ustar longname. 1230 if prefix and obj.type not in GNU_TYPES: 1231 obj.name = prefix + "/" + obj.name 1232 return obj 1233 1234 @classmethod 1235 def fromtarfile(cls, tarfile): 1236 """Return the next TarInfo object from TarFile object 1237 tarfile. 1238 """ 1239 buf = tarfile.fileobj.read(BLOCKSIZE) 1240 obj = cls.frombuf(buf) 1241 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE 1242 return obj._proc_member(tarfile) 1243 1244 #-------------------------------------------------------------------------- 1245 # The following are methods that are called depending on the type of a 1246 # member. The entry point is _proc_member() which can be overridden in a 1247 # subclass to add custom _proc_*() methods. A _proc_*() method MUST 1248 # implement the following 1249 # operations: 1250 # 1. Set self.offset_data to the position where the data blocks begin, 1251 # if there is data that follows. 1252 # 2. Set tarfile.offset to the position where the next member's header will 1253 # begin. 1254 # 3. Return self or another valid TarInfo object. 1255 def _proc_member(self, tarfile): 1256 """Choose the right processing method depending on 1257 the type and call it. 1258 """ 1259 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): 1260 return self._proc_gnulong(tarfile) 1261 elif self.type == GNUTYPE_SPARSE: 1262 return self._proc_sparse(tarfile) 1263 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): 1264 return self._proc_pax(tarfile) 1265 else: 1266 return self._proc_builtin(tarfile) 1267 1268 def _proc_builtin(self, tarfile): 1269 """Process a builtin type or an unknown type which 1270 will be treated as a regular file. 1271 """ 1272 self.offset_data = tarfile.fileobj.tell() 1273 offset = self.offset_data 1274 if self.isreg() or self.type not in SUPPORTED_TYPES: 1275 # Skip the following data blocks. 1276 offset += self._block(self.size) 1277 tarfile.offset = offset 1278 1279 # Patch the TarInfo object with saved global 1280 # header information. 1281 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) 1282 1283 return self 1284 1285 def _proc_gnulong(self, tarfile): 1286 """Process the blocks that hold a GNU longname 1287 or longlink member. 1288 """ 1289 buf = tarfile.fileobj.read(self._block(self.size)) 1290 1291 # Fetch the next header and process it. 1292 try: 1293 next = self.fromtarfile(tarfile) 1294 except HeaderError: 1295 raise SubsequentHeaderError("missing or bad subsequent header") 1296 1297 # Patch the TarInfo object from the next header with 1298 # the longname information. 1299 next.offset = self.offset 1300 if self.type == GNUTYPE_LONGNAME: 1301 next.name = nts(buf) 1302 elif self.type == GNUTYPE_LONGLINK: 1303 next.linkname = nts(buf) 1304 1305 return next 1306 1307 def _proc_sparse(self, tarfile): 1308 """Process a GNU sparse header plus extra headers. 1309 """ 1310 buf = self.buf 1311 sp = _ringbuffer() 1312 pos = 386 1313 lastpos = 0L 1314 realpos = 0L 1315 # There are 4 possible sparse structs in the 1316 # first header. 1317 for i in xrange(4): 1318 try: 1319 offset = nti(buf[pos:pos + 12]) 1320 numbytes = nti(buf[pos + 12:pos + 24]) 1321 except ValueError: 1322 break 1323 if offset > lastpos: 1324 sp.append(_hole(lastpos, offset - lastpos)) 1325 sp.append(_data(offset, numbytes, realpos)) 1326 realpos += numbytes 1327 lastpos = offset + numbytes 1328 pos += 24 1329 1330 isextended = ord(buf[482]) 1331 origsize = nti(buf[483:495]) 1332 1333 # If the isextended flag is given, 1334 # there are extra headers to process. 1335 while isextended == 1: 1336 buf = tarfile.fileobj.read(BLOCKSIZE) 1337 pos = 0 1338 for i in xrange(21): 1339 try: 1340 offset = nti(buf[pos:pos + 12]) 1341 numbytes = nti(buf[pos + 12:pos + 24]) 1342 except ValueError: 1343 break 1344 if offset > lastpos: 1345 sp.append(_hole(lastpos, offset - lastpos)) 1346 sp.append(_data(offset, numbytes, realpos)) 1347 realpos += numbytes 1348 lastpos = offset + numbytes 1349 pos += 24 1350 isextended = ord(buf[504]) 1351 1352 if lastpos < origsize: 1353 sp.append(_hole(lastpos, origsize - lastpos)) 1354 1355 self.sparse = sp 1356 1357 self.offset_data = tarfile.fileobj.tell() 1358 tarfile.offset = self.offset_data + self._block(self.size) 1359 self.size = origsize 1360 1361 return self 1362 1363 def _proc_pax(self, tarfile): 1364 """Process an extended or global header as described in 1365 POSIX.1-2001. 1366 """ 1367 # Read the header information. 1368 buf = tarfile.fileobj.read(self._block(self.size)) 1369 1370 # A pax header stores supplemental information for either 1371 # the following file (extended) or all following files 1372 # (global). 1373 if self.type == XGLTYPE: 1374 pax_headers = tarfile.pax_headers 1375 else: 1376 pax_headers = tarfile.pax_headers.copy() 1377 1378 # Parse pax header information. A record looks like that: 1379 # "%d %s=%s\n" % (length, keyword, value). length is the size 1380 # of the complete record including the length field itself and 1381 # the newline. keyword and value are both UTF-8 encoded strings. 1382 regex = re.compile(r"(\d+) ([^=]+)=", re.U) 1383 pos = 0 1384 while True: 1385 match = regex.match(buf, pos) 1386 if not match: 1387 break 1388 1389 length, keyword = match.groups() 1390 length = int(length) 1391 value = buf[match.end(2) + 1:match.start(1) + length - 1] 1392 1393 keyword = keyword.decode("utf8") 1394 value = value.decode("utf8") 1395 1396 pax_headers[keyword] = value 1397 pos += length 1398 1399 # Fetch the next header. 1400 try: 1401 next = self.fromtarfile(tarfile) 1402 except HeaderError: 1403 raise SubsequentHeaderError("missing or bad subsequent header") 1404 1405 if self.type in (XHDTYPE, SOLARIS_XHDTYPE): 1406 # Patch the TarInfo object with the extended header info. 1407 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) 1408 next.offset = self.offset 1409 1410 if "size" in pax_headers: 1411 # If the extended header replaces the size field, 1412 # we need to recalculate the offset where the next 1413 # header starts. 1414 offset = next.offset_data 1415 if next.isreg() or next.type not in SUPPORTED_TYPES: 1416 offset += next._block(next.size) 1417 tarfile.offset = offset 1418 1419 return next 1420 1421 def _apply_pax_info(self, pax_headers, encoding, errors): 1422 """Replace fields with supplemental information from a previous 1423 pax extended or global header. 1424 """ 1425 for keyword, value in pax_headers.iteritems(): 1426 if keyword not in PAX_FIELDS: 1427 continue 1428 1429 if keyword == "path": 1430 value = value.rstrip("/") 1431 1432 if keyword in PAX_NUMBER_FIELDS: 1433 try: 1434 value = PAX_NUMBER_FIELDS[keyword](value) 1435 except ValueError: 1436 value = 0 1437 else: 1438 value = uts(value, encoding, errors) 1439 1440 setattr(self, keyword, value) 1441 1442 self.pax_headers = pax_headers.copy() 1443 1444 def _block(self, count): 1445 """Round up a byte count by BLOCKSIZE and return it, 1446 e.g. _block(834) => 1024. 1447 """ 1448 blocks, remainder = divmod(count, BLOCKSIZE) 1449 if remainder: 1450 blocks += 1 1451 return blocks * BLOCKSIZE 1452 1453 def isreg(self): 1454 return self.type in REGULAR_TYPES 1455 def isfile(self): 1456 return self.isreg() 1457 def isdir(self): 1458 return self.type == DIRTYPE 1459 def issym(self): 1460 return self.type == SYMTYPE 1461 def islnk(self): 1462 return self.type == LNKTYPE 1463 def ischr(self): 1464 return self.type == CHRTYPE 1465 def isblk(self): 1466 return self.type == BLKTYPE 1467 def isfifo(self): 1468 return self.type == FIFOTYPE 1469 def issparse(self): 1470 return self.type == GNUTYPE_SPARSE 1471 def isdev(self): 1472 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) 1473 # class TarInfo 1474 1475 class TarFile(object): 1476 """The TarFile Class provides an interface to tar archives. 1477 """ 1478 1479 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) 1480 1481 dereference = False # If true, add content of linked file to the 1482 # tar file, else the link. 1483 1484 ignore_zeros = False # If true, skips empty or invalid blocks and 1485 # continues processing. 1486 1487 errorlevel = 1 # If 0, fatal errors only appear in debug 1488 # messages (if debug >= 0). If > 0, errors 1489 # are passed to the caller as exceptions. 1490 1491 format = DEFAULT_FORMAT # The format to use when creating an archive. 1492 1493 encoding = ENCODING # Encoding for 8-bit character strings. 1494 1495 errors = None # Error handler for unicode conversion. 1496 1497 tarinfo = TarInfo # The default TarInfo class to use. 1498 1499 fileobject = ExFileObject # The default ExFileObject class to use. 1500 1501 def __init__(self, name=None, mode="r", fileobj=None, format=None, 1502 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, 1503 errors=None, pax_headers=None, debug=None, errorlevel=None): 1504 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to 1505 read from an existing archive, 'a' to append data to an existing 1506 file or 'w' to create a new file overwriting an existing one. `mode' 1507 defaults to 'r'. 1508 If `fileobj' is given, it is used for reading or writing data. If it 1509 can be determined, `mode' is overridden by `fileobj's mode. 1510 `fileobj' is not closed, when TarFile is closed. 1511 """ 1512 if len(mode) > 1 or mode not in "raw": 1513 raise ValueError("mode must be 'r', 'a' or 'w'") 1514 self.mode = mode 1515 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode] 1516 1517 if not fileobj: 1518 if self.mode == "a" and not os.path.exists(name): 1519 # Create nonexistent files in append mode. 1520 self.mode = "w" 1521 self._mode = "wb" 1522 fileobj = bltn_open(name, self._mode) 1523 self._extfileobj = False 1524 else: 1525 if name is None and hasattr(fileobj, "name"): 1526 name = fileobj.name 1527 if hasattr(fileobj, "mode"): 1528 self._mode = fileobj.mode 1529 self._extfileobj = True 1530 self.name = os.path.abspath(name) if name else None 1531 self.fileobj = fileobj 1532 1533 # Init attributes. 1534 if format is not None: 1535 self.format = format 1536 if tarinfo is not None: 1537 self.tarinfo = tarinfo 1538 if dereference is not None: 1539 self.dereference = dereference 1540 if ignore_zeros is not None: 1541 self.ignore_zeros = ignore_zeros 1542 if encoding is not None: 1543 self.encoding = encoding 1544 1545 if errors is not None: 1546 self.errors = errors 1547 elif mode == "r": 1548 self.errors = "utf-8" 1549 else: 1550 self.errors = "strict" 1551 1552 if pax_headers is not None and self.format == PAX_FORMAT: 1553 self.pax_headers = pax_headers 1554 else: 1555 self.pax_headers = {} 1556 1557 if debug is not None: 1558 self.debug = debug 1559 if errorlevel is not None: 1560 self.errorlevel = errorlevel 1561 1562 # Init datastructures. 1563 self.closed = False 1564 self.members = [] # list of members as TarInfo objects 1565 self._loaded = False # flag if all members have been read 1566 self.offset = self.fileobj.tell() 1567 # current position in the archive file 1568 self.inodes = {} # dictionary caching the inodes of 1569 # archive members already added 1570 1571 try: 1572 if self.mode == "r": 1573 self.firstmember = None 1574 self.firstmember = self.next() 1575 1576 if self.mode == "a": 1577 # Move to the end of the archive, 1578 # before the first empty block. 1579 while True: 1580 self.fileobj.seek(self.offset) 1581 try: 1582 tarinfo = self.tarinfo.fromtarfile(self) 1583 self.members.append(tarinfo) 1584 except EOFHeaderError: 1585 self.fileobj.seek(self.offset) 1586 break 1587 except HeaderError, e: 1588 raise ReadError(str(e)) 1589 1590 if self.mode in "aw": 1591 self._loaded = True 1592 1593 if self.pax_headers: 1594 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) 1595 self.fileobj.write(buf) 1596 self.offset += len(buf) 1597 except: 1598 if not self._extfileobj: 1599 self.fileobj.close() 1600 self.closed = True 1601 raise 1602 1603 def _getposix(self): 1604 return self.format == USTAR_FORMAT 1605 def _setposix(self, value): 1606 import warnings 1607 warnings.warn("use the format attribute instead", DeprecationWarning, 1608 2) 1609 if value: 1610 self.format = USTAR_FORMAT 1611 else: 1612 self.format = GNU_FORMAT 1613 posix = property(_getposix, _setposix) 1614 1615 #-------------------------------------------------------------------------- 1616 # Below are the classmethods which act as alternate constructors to the 1617 # TarFile class. The open() method is the only one that is needed for 1618 # public use; it is the "super"-constructor and is able to select an 1619 # adequate "sub"-constructor for a particular compression using the mapping 1620 # from OPEN_METH. 1621 # 1622 # This concept allows one to subclass TarFile without losing the comfort of 1623 # the super-constructor. A sub-constructor is registered and made available 1624 # by adding it to the mapping in OPEN_METH. 1625 1626 @classmethod 1627 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): 1628 """Open a tar archive for reading, writing or appending. Return 1629 an appropriate TarFile class. 1630 1631 mode: 1632 'r' or 'r:*' open for reading with transparent compression 1633 'r:' open for reading exclusively uncompressed 1634 'r:gz' open for reading with gzip compression 1635 'r:bz2' open for reading with bzip2 compression 1636 'a' or 'a:' open for appending, creating the file if necessary 1637 'w' or 'w:' open for writing without compression 1638 'w:gz' open for writing with gzip compression 1639 'w:bz2' open for writing with bzip2 compression 1640 1641 'r|*' open a stream of tar blocks with transparent compression 1642 'r|' open an uncompressed stream of tar blocks for reading 1643 'r|gz' open a gzip compressed stream of tar blocks 1644 'r|bz2' open a bzip2 compressed stream of tar blocks 1645 'w|' open an uncompressed stream for writing 1646 'w|gz' open a gzip compressed stream for writing 1647 'w|bz2' open a bzip2 compressed stream for writing 1648 """ 1649 1650 if not name and not fileobj: 1651 raise ValueError("nothing to open") 1652 1653 if mode in ("r", "r:*"): 1654 # Find out which *open() is appropriate for opening the file. 1655 for comptype in cls.OPEN_METH: 1656 func = getattr(cls, cls.OPEN_METH[comptype]) 1657 if fileobj is not None: 1658 saved_pos = fileobj.tell() 1659 try: 1660 return func(name, "r", fileobj, **kwargs) 1661 except (ReadError, CompressionError), e: 1662 if fileobj is not None: 1663 fileobj.seek(saved_pos) 1664 continue 1665 raise ReadError("file could not be opened successfully") 1666 1667 elif ":" in mode: 1668 filemode, comptype = mode.split(":", 1) 1669 filemode = filemode or "r" 1670 comptype = comptype or "tar" 1671 1672 # Select the *open() function according to 1673 # given compression. 1674 if comptype in cls.OPEN_METH: 1675 func = getattr(cls, cls.OPEN_METH[comptype]) 1676 else: 1677 raise CompressionError("unknown compression type %r" % comptype) 1678 return func(name, filemode, fileobj, **kwargs) 1679 1680 elif "|" in mode: 1681 filemode, comptype = mode.split("|", 1) 1682 filemode = filemode or "r" 1683 comptype = comptype or "tar" 1684 1685 if filemode not in "rw": 1686 raise ValueError("mode must be 'r' or 'w'") 1687 1688 t = cls(name, filemode, 1689 _Stream(name, filemode, comptype, fileobj, bufsize), 1690 **kwargs) 1691 t._extfileobj = False 1692 return t 1693 1694 elif mode in "aw": 1695 return cls.taropen(name, mode, fileobj, **kwargs) 1696 1697 raise ValueError("undiscernible mode") 1698 1699 @classmethod 1700 def taropen(cls, name, mode="r", fileobj=None, **kwargs): 1701 """Open uncompressed tar archive name for reading or writing. 1702 """ 1703 if len(mode) > 1 or mode not in "raw": 1704 raise ValueError("mode must be 'r', 'a' or 'w'") 1705 return cls(name, mode, fileobj, **kwargs) 1706 1707 @classmethod 1708 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1709 """Open gzip compressed tar archive name for reading or writing. 1710 Appending is not allowed. 1711 """ 1712 if len(mode) > 1 or mode not in "rw": 1713 raise ValueError("mode must be 'r' or 'w'") 1714 1715 try: 1716 import gzip 1717 gzip.GzipFile 1718 except (ImportError, AttributeError): 1719 raise CompressionError("gzip module is not available") 1720 1721 if fileobj is None: 1722 fileobj = bltn_open(name, mode + "b") 1723 1724 try: 1725 t = cls.taropen(name, mode, 1726 gzip.GzipFile(name, mode, compresslevel, fileobj), 1727 **kwargs) 1728 except IOError: 1729 raise ReadError("not a gzip file") 1730 t._extfileobj = False 1731 return t 1732 1733 @classmethod 1734 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1735 """Open bzip2 compressed tar archive name for reading or writing. 1736 Appending is not allowed. 1737 """ 1738 if len(mode) > 1 or mode not in "rw": 1739 raise ValueError("mode must be 'r' or 'w'.") 1740 1741 try: 1742 import bz2 1743 except ImportError: 1744 raise CompressionError("bz2 module is not available") 1745 1746 if fileobj is not None: 1747 fileobj = _BZ2Proxy(fileobj, mode) 1748 else: 1749 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) 1750 1751 try: 1752 t = cls.taropen(name, mode, fileobj, **kwargs) 1753 except (IOError, EOFError): 1754 raise ReadError("not a bzip2 file") 1755 t._extfileobj = False 1756 return t 1757 1758 # All *open() methods are registered here. 1759 OPEN_METH = { 1760 "tar": "taropen", # uncompressed tar 1761 "gz": "gzopen", # gzip compressed tar 1762 "bz2": "bz2open" # bzip2 compressed tar 1763 } 1764 1765 #-------------------------------------------------------------------------- 1766 # The public methods which TarFile provides: 1767 1768 def close(self): 1769 """Close the TarFile. In write-mode, two finishing zero blocks are 1770 appended to the archive. 1771 """ 1772 if self.closed: 1773 return 1774 1775 if self.mode in "aw": 1776 self.fileobj.write(NUL * (BLOCKSIZE * 2)) 1777 self.offset += (BLOCKSIZE * 2) 1778 # fill up the end with zero-blocks 1779 # (like option -b20 for tar does) 1780 blocks, remainder = divmod(self.offset, RECORDSIZE) 1781 if remainder > 0: 1782 self.fileobj.write(NUL * (RECORDSIZE - remainder)) 1783 1784 if not self._extfileobj: 1785 self.fileobj.close() 1786 self.closed = True 1787 1788 def getmember(self, name): 1789 """Return a TarInfo object for member `name'. If `name' can not be 1790 found in the archive, KeyError is raised. If a member occurs more 1791 than once in the archive, its last occurrence is assumed to be the 1792 most up-to-date version. 1793 """ 1794 tarinfo = self._getmember(name) 1795 if tarinfo is None: 1796 raise KeyError("filename %r not found" % name) 1797 return tarinfo 1798 1799 def getmembers(self): 1800 """Return the members of the archive as a list of TarInfo objects. The 1801 list has the same order as the members in the archive. 1802 """ 1803 self._check() 1804 if not self._loaded: # if we want to obtain a list of 1805 self._load() # all members, we first have to 1806 # scan the whole archive. 1807 return self.members 1808 1809 def getnames(self): 1810 """Return the members of the archive as a list of their names. It has 1811 the same order as the list returned by getmembers(). 1812 """ 1813 return [tarinfo.name for tarinfo in self.getmembers()] 1814 1815 def gettarinfo(self, name=None, arcname=None, fileobj=None): 1816 """Create a TarInfo object for either the file `name' or the file 1817 object `fileobj' (using os.fstat on its file descriptor). You can 1818 modify some of the TarInfo's attributes before you add it using 1819 addfile(). If given, `arcname' specifies an alternative name for the 1820 file in the archive. 1821 """ 1822 self._check("aw") 1823 1824 # When fileobj is given, replace name by 1825 # fileobj's real name. 1826 if fileobj is not None: 1827 name = fileobj.name 1828 1829 # Building the name of the member in the archive. 1830 # Backward slashes are converted to forward slashes, 1831 # Absolute paths are turned to relative paths. 1832 if arcname is None: 1833 arcname = name 1834 drv, arcname = os.path.splitdrive(arcname) 1835 arcname = arcname.replace(os.sep, "/") 1836 arcname = arcname.lstrip("/") 1837 1838 # Now, fill the TarInfo object with 1839 # information specific for the file. 1840 tarinfo = self.tarinfo() 1841 tarinfo.tarfile = self 1842 1843 # Use os.stat or os.lstat, depending on platform 1844 # and if symlinks shall be resolved. 1845 if fileobj is None: 1846 if hasattr(os, "lstat") and not self.dereference: 1847 statres = os.lstat(name) 1848 else: 1849 statres = os.stat(name) 1850 else: 1851 statres = os.fstat(fileobj.fileno()) 1852 linkname = "" 1853 1854 stmd = statres.st_mode 1855 if stat.S_ISREG(stmd): 1856 inode = (statres.st_ino, statres.st_dev) 1857 if not self.dereference and statres.st_nlink > 1 and \ 1858 inode in self.inodes and arcname != self.inodes[inode]: 1859 # Is it a hardlink to an already 1860 # archived file? 1861 type = LNKTYPE 1862 linkname = self.inodes[inode] 1863 else: 1864 # The inode is added only if its valid. 1865 # For win32 it is always 0. 1866 type = REGTYPE 1867 if inode[0]: 1868 self.inodes[inode] = arcname 1869 elif stat.S_ISDIR(stmd): 1870 type = DIRTYPE 1871 elif stat.S_ISFIFO(stmd): 1872 type = FIFOTYPE 1873 elif stat.S_ISLNK(stmd): 1874 type = SYMTYPE 1875 linkname = os.readlink(name) 1876 elif stat.S_ISCHR(stmd): 1877 type = CHRTYPE 1878 elif stat.S_ISBLK(stmd): 1879 type = BLKTYPE 1880 else: 1881 return None 1882 1883 # Fill the TarInfo object with all 1884 # information we can get. 1885 tarinfo.name = arcname 1886 tarinfo.mode = stmd 1887 tarinfo.uid = statres.st_uid 1888 tarinfo.gid = statres.st_gid 1889 if type == REGTYPE: 1890 tarinfo.size = statres.st_size 1891 else: 1892 tarinfo.size = 0L 1893 tarinfo.mtime = statres.st_mtime 1894 tarinfo.type = type 1895 tarinfo.linkname = linkname 1896 if pwd: 1897 try: 1898 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] 1899 except KeyError: 1900 pass 1901 if grp: 1902 try: 1903 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] 1904 except KeyError: 1905 pass 1906 1907 if type in (CHRTYPE, BLKTYPE): 1908 if hasattr(os, "major") and hasattr(os, "minor"): 1909 tarinfo.devmajor = os.major(statres.st_rdev) 1910 tarinfo.devminor = os.minor(statres.st_rdev) 1911 return tarinfo 1912 1913 def list(self, verbose=True): 1914 """Print a table of contents to sys.stdout. If `verbose' is False, only 1915 the names of the members are printed. If it is True, an `ls -l'-like 1916 output is produced. 1917 """ 1918 self._check() 1919 1920 for tarinfo in self: 1921 if verbose: 1922 print filemode(tarinfo.mode), 1923 print "%s/%s" % (tarinfo.uname or tarinfo.uid, 1924 tarinfo.gname or tarinfo.gid), 1925 if tarinfo.ischr() or tarinfo.isblk(): 1926 print "%10s" % ("%d,%d" \ 1927 % (tarinfo.devmajor, tarinfo.devminor)), 1928 else: 1929 print "%10d" % tarinfo.size, 1930 print "%d-%02d-%02d %02d:%02d:%02d" \ 1931 % time.localtime(tarinfo.mtime)[:6], 1932 1933 print tarinfo.name + ("/" if tarinfo.isdir() else ""), 1934 1935 if verbose: 1936 if tarinfo.issym(): 1937 print "->", tarinfo.linkname, 1938 if tarinfo.islnk(): 1939 print "link to", tarinfo.linkname, 1940 print 1941 1942 def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): 1943 """Add the file `name' to the archive. `name' may be any type of file 1944 (directory, fifo, symbolic link, etc.). If given, `arcname' 1945 specifies an alternative name for the file in the archive. 1946 Directories are added recursively by default. This can be avoided by 1947 setting `recursive' to False. `exclude' is a function that should 1948 return True for each filename to be excluded. `filter' is a function 1949 that expects a TarInfo object argument and returns the changed 1950 TarInfo object, if it returns None the TarInfo object will be 1951 excluded from the archive. 1952 """ 1953 self._check("aw") 1954 1955 if arcname is None: 1956 arcname = name 1957 1958 # Exclude pathnames. 1959 if exclude is not None: 1960 import warnings 1961 warnings.warn("use the filter argument instead", 1962 DeprecationWarning, 2) 1963 if exclude(name): 1964 self._dbg(2, "tarfile: Excluded %r" % name) 1965 return 1966 1967 # Skip if somebody tries to archive the archive... 1968 if self.name is not None and os.path.abspath(name) == self.name: 1969 self._dbg(2, "tarfile: Skipped %r" % name) 1970 return 1971 1972 self._dbg(1, name) 1973 1974 # Create a TarInfo object from the file. 1975 tarinfo = self.gettarinfo(name, arcname) 1976 1977 if tarinfo is None: 1978 self._dbg(1, "tarfile: Unsupported type %r" % name) 1979 return 1980 1981 # Change or exclude the TarInfo object. 1982 if filter is not None: 1983 tarinfo = filter(tarinfo) 1984 if tarinfo is None: 1985 self._dbg(2, "tarfile: Excluded %r" % name) 1986 return 1987 1988 # Append the tar header and data to the archive. 1989 if tarinfo.isreg(): 1990 with bltn_open(name, "rb") as f: 1991 self.addfile(tarinfo, f) 1992 1993 elif tarinfo.isdir(): 1994 self.addfile(tarinfo) 1995 if recursive: 1996 for f in os.listdir(name): 1997 self.add(os.path.join(name, f), os.path.join(arcname, f), 1998 recursive, exclude, filter) 1999 2000 else: 2001 self.addfile(tarinfo) 2002 2003 def addfile(self, tarinfo, fileobj=None): 2004 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is 2005 given, tarinfo.size bytes are read from it and added to the archive. 2006 You can create TarInfo objects using gettarinfo(). 2007 On Windows platforms, `fileobj' should always be opened with mode 2008 'rb' to avoid irritation about the file size. 2009 """ 2010 self._check("aw") 2011 2012 tarinfo = copy.copy(tarinfo) 2013 2014 buf = tarinfo.tobuf(self.format, self.encoding, self.errors) 2015 self.fileobj.write(buf) 2016 self.offset += len(buf) 2017 2018 # If there's data to follow, append it. 2019 if fileobj is not None: 2020 copyfileobj(fileobj, self.fileobj, tarinfo.size) 2021 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) 2022 if remainder > 0: 2023 self.fileobj.write(NUL * (BLOCKSIZE - remainder)) 2024 blocks += 1 2025 self.offset += blocks * BLOCKSIZE 2026 2027 self.members.append(tarinfo) 2028 2029 def extractall(self, path=".", members=None): 2030 """Extract all members from the archive to the current working 2031 directory and set owner, modification time and permissions on 2032 directories afterwards. `path' specifies a different directory 2033 to extract to. `members' is optional and must be a subset of the 2034 list returned by getmembers(). 2035 """ 2036 directories = [] 2037 2038 if members is None: 2039 members = self 2040 2041 for tarinfo in members: 2042 if tarinfo.isdir(): 2043 # Extract directories with a safe mode. 2044 directories.append(tarinfo) 2045 tarinfo = copy.copy(tarinfo) 2046 tarinfo.mode = 0700 2047 self.extract(tarinfo, path) 2048 2049 # Reverse sort directories. 2050 directories.sort(key=operator.attrgetter('name')) 2051 directories.reverse() 2052 2053 # Set correct owner, mtime and filemode on directories. 2054 for tarinfo in directories: 2055 dirpath = os.path.join(path, tarinfo.name) 2056 try: 2057 self.chown(tarinfo, dirpath) 2058 self.utime(tarinfo, dirpath) 2059 self.chmod(tarinfo, dirpath) 2060 except ExtractError, e: 2061 if self.errorlevel > 1: 2062 raise 2063 else: 2064 self._dbg(1, "tarfile: %s" % e) 2065 2066 def extract(self, member, path=""): 2067 """Extract a member from the archive to the current working directory, 2068 using its full name. Its file information is extracted as accurately 2069 as possible. `member' may be a filename or a TarInfo object. You can 2070 specify a different directory using `path'. 2071 """ 2072 self._check("r") 2073 2074 if isinstance(member, basestring): 2075 tarinfo = self.getmember(member) 2076 else: 2077 tarinfo = member 2078 2079 # Prepare the link target for makelink(). 2080 if tarinfo.islnk(): 2081 tarinfo._link_target = os.path.join(path, tarinfo.linkname) 2082 2083 try: 2084 self._extract_member(tarinfo, os.path.join(path, tarinfo.name)) 2085 except EnvironmentError, e: 2086 if self.errorlevel > 0: 2087 raise 2088 else: 2089 if e.filename is None: 2090 self._dbg(1, "tarfile: %s" % e.strerror) 2091 else: 2092 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) 2093 except ExtractError, e: 2094 if self.errorlevel > 1: 2095 raise 2096 else: 2097 self._dbg(1, "tarfile: %s" % e) 2098 2099 def extractfile(self, member): 2100 """Extract a member from the archive as a file object. `member' may be 2101 a filename or a TarInfo object. If `member' is a regular file, a 2102 file-like object is returned. If `member' is a link, a file-like 2103 object is constructed from the link's target. If `member' is none of 2104 the above, None is returned. 2105 The file-like object is read-only and provides the following 2106 methods: read(), readline(), readlines(), seek() and tell() 2107 """ 2108 self._check("r") 2109 2110 if isinstance(member, basestring): 2111 tarinfo = self.getmember(member) 2112 else: 2113 tarinfo = member 2114 2115 if tarinfo.isreg(): 2116 return self.fileobject(self, tarinfo) 2117 2118 elif tarinfo.type not in SUPPORTED_TYPES: 2119 # If a member's type is unknown, it is treated as a 2120 # regular file. 2121 return self.fileobject(self, tarinfo) 2122 2123 elif tarinfo.islnk() or tarinfo.issym(): 2124 if isinstance(self.fileobj, _Stream): 2125 # A small but ugly workaround for the case that someone tries 2126 # to extract a (sym)link as a file-object from a non-seekable 2127 # stream of tar blocks. 2128 raise StreamError("cannot extract (sym)link as file object") 2129 else: 2130 # A (sym)link's file object is its target's file object. 2131 return self.extractfile(self._find_link_target(tarinfo)) 2132 else: 2133 # If there's no data associated with the member (directory, chrdev, 2134 # blkdev, etc.), return None instead of a file object. 2135 return None 2136 2137 def _extract_member(self, tarinfo, targetpath): 2138 """Extract the TarInfo object tarinfo to a physical 2139 file called targetpath. 2140 """ 2141 # Fetch the TarInfo object for the given name 2142 # and build the destination pathname, replacing 2143 # forward slashes to platform specific separators. 2144 targetpath = targetpath.rstrip("/") 2145 targetpath = targetpath.replace("/", os.sep) 2146 2147 # Create all upper directories. 2148 upperdirs = os.path.dirname(targetpath) 2149 if upperdirs and not os.path.exists(upperdirs): 2150 # Create directories that are not part of the archive with 2151 # default permissions. 2152 os.makedirs(upperdirs) 2153 2154 if tarinfo.islnk() or tarinfo.issym(): 2155 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) 2156 else: 2157 self._dbg(1, tarinfo.name) 2158 2159 if tarinfo.isreg(): 2160 self.makefile(tarinfo, targetpath) 2161 elif tarinfo.isdir(): 2162 self.makedir(tarinfo, targetpath) 2163 elif tarinfo.isfifo(): 2164 self.makefifo(tarinfo, targetpath) 2165 elif tarinfo.ischr() or tarinfo.isblk(): 2166 self.makedev(tarinfo, targetpath) 2167 elif tarinfo.islnk() or tarinfo.issym(): 2168 self.makelink(tarinfo, targetpath) 2169 elif tarinfo.type not in SUPPORTED_TYPES: 2170 self.makeunknown(tarinfo, targetpath) 2171 else: 2172 self.makefile(tarinfo, targetpath) 2173 2174 self.chown(tarinfo, targetpath) 2175 if not tarinfo.issym(): 2176 self.chmod(tarinfo, targetpath) 2177 self.utime(tarinfo, targetpath) 2178 2179 #-------------------------------------------------------------------------- 2180 # Below are the different file methods. They are called via 2181 # _extract_member() when extract() is called. They can be replaced in a 2182 # subclass to implement other functionality. 2183 2184 def makedir(self, tarinfo, targetpath): 2185 """Make a directory called targetpath. 2186 """ 2187 try: 2188 # Use a safe mode for the directory, the real mode is set 2189 # later in _extract_member(). 2190 os.mkdir(targetpath, 0700) 2191 except EnvironmentError, e: 2192 if e.errno != errno.EEXIST: 2193 raise 2194 2195 def makefile(self, tarinfo, targetpath): 2196 """Make a file called targetpath. 2197 """ 2198 source = self.extractfile(tarinfo) 2199 try: 2200 with bltn_open(targetpath, "wb") as target: 2201 copyfileobj(source, target) 2202 finally: 2203 source.close() 2204 2205 def makeunknown(self, tarinfo, targetpath): 2206 """Make a file from a TarInfo object with an unknown type 2207 at targetpath. 2208 """ 2209 self.makefile(tarinfo, targetpath) 2210 self._dbg(1, "tarfile: Unknown file type %r, " \ 2211 "extracted as regular file." % tarinfo.type) 2212 2213 def makefifo(self, tarinfo, targetpath): 2214 """Make a fifo called targetpath. 2215 """ 2216 if hasattr(os, "mkfifo"): 2217 os.mkfifo(targetpath) 2218 else: 2219 raise ExtractError("fifo not supported by system") 2220 2221 def makedev(self, tarinfo, targetpath): 2222 """Make a character or block device called targetpath. 2223 """ 2224 if not hasattr(os, "mknod") or not hasattr(os, "makedev"): 2225 raise ExtractError("special devices not supported by system") 2226 2227 mode = tarinfo.mode 2228 if tarinfo.isblk(): 2229 mode |= stat.S_IFBLK 2230 else: 2231 mode |= stat.S_IFCHR 2232 2233 os.mknod(targetpath, mode, 2234 os.makedev(tarinfo.devmajor, tarinfo.devminor)) 2235 2236 def makelink(self, tarinfo, targetpath): 2237 """Make a (symbolic) link called targetpath. If it cannot be created 2238 (platform limitation), we try to make a copy of the referenced file 2239 instead of a link. 2240 """ 2241 if hasattr(os, "symlink") and hasattr(os, "link"): 2242 # For systems that support symbolic and hard links. 2243 if tarinfo.issym(): 2244 if os.path.lexists(targetpath): 2245 os.unlink(targetpath) 2246 os.symlink(tarinfo.linkname, targetpath) 2247 else: 2248 # See extract(). 2249 if os.path.exists(tarinfo._link_target): 2250 if os.path.lexists(targetpath): 2251 os.unlink(targetpath) 2252 os.link(tarinfo._link_target, targetpath) 2253 else: 2254 self._extract_member(self._find_link_target(tarinfo), targetpath) 2255 else: 2256 try: 2257 self._extract_member(self._find_link_target(tarinfo), targetpath) 2258 except KeyError: 2259 raise ExtractError("unable to resolve link inside archive") 2260 2261 def chown(self, tarinfo, targetpath): 2262 """Set owner of targetpath according to tarinfo. 2263 """ 2264 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: 2265 # We have to be root to do so. 2266 try: 2267 g = grp.getgrnam(tarinfo.gname)[2] 2268 except KeyError: 2269 g = tarinfo.gid 2270 try: 2271 u = pwd.getpwnam(tarinfo.uname)[2] 2272 except KeyError: 2273 u = tarinfo.uid 2274 try: 2275 if tarinfo.issym() and hasattr(os, "lchown"): 2276 os.lchown(targetpath, u, g) 2277 else: 2278 if sys.platform != "os2emx": 2279 os.chown(targetpath, u, g) 2280 except EnvironmentError, e: 2281 raise ExtractError("could not change owner") 2282 2283 def chmod(self, tarinfo, targetpath): 2284 """Set file permissions of targetpath according to tarinfo. 2285 """ 2286 if hasattr(os, 'chmod'): 2287 try: 2288 os.chmod(targetpath, tarinfo.mode) 2289 except EnvironmentError, e: 2290 raise ExtractError("could not change mode") 2291 2292 def utime(self, tarinfo, targetpath): 2293 """Set modification time of targetpath according to tarinfo. 2294 """ 2295 if not hasattr(os, 'utime'): 2296 return 2297 try: 2298 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) 2299 except EnvironmentError, e: 2300 raise ExtractError("could not change modification time") 2301 2302 #-------------------------------------------------------------------------- 2303 def next(self): 2304 """Return the next member of the archive as a TarInfo object, when 2305 TarFile is opened for reading. Return None if there is no more 2306 available. 2307 """ 2308 self._check("ra") 2309 if self.firstmember is not None: 2310 m = self.firstmember 2311 self.firstmember = None 2312 return m 2313 2314 # Read the next block. 2315 self.fileobj.seek(self.offset) 2316 tarinfo = None 2317 while True: 2318 try: 2319 tarinfo = self.tarinfo.fromtarfile(self) 2320 except EOFHeaderError, e: 2321 if self.ignore_zeros: 2322 self._dbg(2, "0x%X: %s" % (self.offset, e)) 2323 self.offset += BLOCKSIZE 2324 continue 2325 except InvalidHeaderError, e: 2326 if self.ignore_zeros: 2327 self._dbg(2, "0x%X: %s" % (self.offset, e)) 2328 self.offset += BLOCKSIZE 2329 continue 2330 elif self.offset == 0: 2331 raise ReadError(str(e)) 2332 except EmptyHeaderError: 2333 if self.offset == 0: 2334 raise ReadError("empty file") 2335 except TruncatedHeaderError, e: 2336 if self.offset == 0: 2337 raise ReadError(str(e)) 2338 except SubsequentHeaderError, e: 2339 raise ReadError(str(e)) 2340 break 2341 2342 if tarinfo is not None: 2343 self.members.append(tarinfo) 2344 else: 2345 self._loaded = True 2346 2347 return tarinfo 2348 2349 #-------------------------------------------------------------------------- 2350 # Little helper methods: 2351 2352 def _getmember(self, name, tarinfo=None, normalize=False): 2353 """Find an archive member by name from bottom to top. 2354 If tarinfo is given, it is used as the starting point. 2355 """ 2356 # Ensure that all members have been loaded. 2357 members = self.getmembers() 2358 2359 # Limit the member search list up to tarinfo. 2360 if tarinfo is not None: 2361 members = members[:members.index(tarinfo)] 2362 2363 if normalize: 2364 name = os.path.normpath(name) 2365 2366 for member in reversed(members): 2367 if normalize: 2368 member_name = os.path.normpath(member.name) 2369 else: 2370 member_name = member.name 2371 2372 if name == member_name: 2373 return member 2374 2375 def _load(self): 2376 """Read through the entire archive file and look for readable 2377 members. 2378 """ 2379 while True: 2380 tarinfo = self.next() 2381 if tarinfo is None: 2382 break 2383 self._loaded = True 2384 2385 def _check(self, mode=None): 2386 """Check if TarFile is still open, and if the operation's mode 2387 corresponds to TarFile's mode. 2388 """ 2389 if self.closed: 2390 raise IOError("%s is closed" % self.__class__.__name__) 2391 if mode is not None and self.mode not in mode: 2392 raise IOError("bad operation for mode %r" % self.mode) 2393 2394 def _find_link_target(self, tarinfo): 2395 """Find the target member of a symlink or hardlink member in the 2396 archive. 2397 """ 2398 if tarinfo.issym(): 2399 # Always search the entire archive. 2400 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname))) 2401 limit = None 2402 else: 2403 # Search the archive before the link, because a hard link is 2404 # just a reference to an already archived file. 2405 linkname = tarinfo.linkname 2406 limit = tarinfo 2407 2408 member = self._getmember(linkname, tarinfo=limit, normalize=True) 2409 if member is None: 2410 raise KeyError("linkname %r not found" % linkname) 2411 return member 2412 2413 def __iter__(self): 2414 """Provide an iterator object. 2415 """ 2416 if self._loaded: 2417 return iter(self.members) 2418 else: 2419 return TarIter(self) 2420 2421 def _dbg(self, level, msg): 2422 """Write debugging output to sys.stderr. 2423 """ 2424 if level <= self.debug: 2425 print >> sys.stderr, msg 2426 2427 def __enter__(self): 2428 self._check() 2429 return self 2430 2431 def __exit__(self, type, value, traceback): 2432 if type is None: 2433 self.close() 2434 else: 2435 # An exception occurred. We must not call close() because 2436 # it would try to write end-of-archive blocks and padding. 2437 if not self._extfileobj: 2438 self.fileobj.close() 2439 self.closed = True 2440 # class TarFile 2441 2442 class TarIter: 2443 """Iterator Class. 2444 2445 for tarinfo in TarFile(...): 2446 suite... 2447 """ 2448 2449 def __init__(self, tarfile): 2450 """Construct a TarIter object. 2451 """ 2452 self.tarfile = tarfile 2453 self.index = 0 2454 def __iter__(self): 2455 """Return iterator object. 2456 """ 2457 return self 2458 def next(self): 2459 """Return the next item using TarFile's next() method. 2460 When all members have been read, set TarFile as _loaded. 2461 """ 2462 # Fix for SF #1100429: Under rare circumstances it can 2463 # happen that getmembers() is called during iteration, 2464 # which will cause TarIter to stop prematurely. 2465 2466 if self.index == 0 and self.tarfile.firstmember is not None: 2467 tarinfo = self.tarfile.next() 2468 elif self.index < len(self.tarfile.members): 2469 tarinfo = self.tarfile.members[self.index] 2470 elif not self.tarfile._loaded: 2471 tarinfo = self.tarfile.next() 2472 if not tarinfo: 2473 self.tarfile._loaded = True 2474 raise StopIteration 2475 else: 2476 raise StopIteration 2477 self.index += 1 2478 return tarinfo 2479 2480 # Helper classes for sparse file support 2481 class _section: 2482 """Base class for _data and _hole. 2483 """ 2484 def __init__(self, offset, size): 2485 self.offset = offset 2486 self.size = size 2487 def __contains__(self, offset): 2488 return self.offset <= offset < self.offset + self.size 2489 2490 class _data(_section): 2491 """Represent a data section in a sparse file. 2492 """ 2493 def __init__(self, offset, size, realpos): 2494 _section.__init__(self, offset, size) 2495 self.realpos = realpos 2496 2497 class _hole(_section): 2498 """Represent a hole section in a sparse file. 2499 """ 2500 pass 2501 2502 class _ringbuffer(list): 2503 """Ringbuffer class which increases performance 2504 over a regular list. 2505 """ 2506 def __init__(self): 2507 self.idx = 0 2508 def find(self, offset): 2509 idx = self.idx 2510 while True: 2511 item = self[idx] 2512 if offset in item: 2513 break 2514 idx += 1 2515 if idx == len(self): 2516 idx = 0 2517 if idx == self.idx: 2518 # End of File 2519 return None 2520 self.idx = idx 2521 return item 2522 2523 #--------------------------------------------- 2524 # zipfile compatible TarFile class 2525 #--------------------------------------------- 2526 TAR_PLAIN = 0 # zipfile.ZIP_STORED 2527 TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED 2528 class TarFileCompat: 2529 """TarFile class compatible with standard module zipfile's 2530 ZipFile class. 2531 """ 2532 def __init__(self, file, mode="r", compression=TAR_PLAIN): 2533 from warnings import warnpy3k 2534 warnpy3k("the TarFileCompat class has been removed in Python 3.0", 2535 stacklevel=2) 2536 if compression == TAR_PLAIN: 2537 self.tarfile = TarFile.taropen(file, mode) 2538 elif compression == TAR_GZIPPED: 2539 self.tarfile = TarFile.gzopen(file, mode) 2540 else: 2541 raise ValueError("unknown compression constant") 2542 if mode[0:1] == "r": 2543 members = self.tarfile.getmembers() 2544 for m in members: 2545 m.filename = m.name 2546 m.file_size = m.size 2547 m.date_time = time.gmtime(m.mtime)[:6] 2548 def namelist(self): 2549 return map(lambda m: m.name, self.infolist()) 2550 def infolist(self): 2551 return filter(lambda m: m.type in REGULAR_TYPES, 2552 self.tarfile.getmembers()) 2553 def printdir(self): 2554 self.tarfile.list() 2555 def testzip(self): 2556 return 2557 def getinfo(self, name): 2558 return self.tarfile.getmember(name) 2559 def read(self, name): 2560 return self.tarfile.extractfile(self.tarfile.getmember(name)).read() 2561 def write(self, filename, arcname=None, compress_type=None): 2562 self.tarfile.add(filename, arcname) 2563 def writestr(self, zinfo, bytes): 2564 try: 2565 from cStringIO import StringIO 2566 except ImportError: 2567 from StringIO import StringIO 2568 import calendar 2569 tinfo = TarInfo(zinfo.filename) 2570 tinfo.size = len(bytes) 2571 tinfo.mtime = calendar.timegm(zinfo.date_time) 2572 self.tarfile.addfile(tinfo, StringIO(bytes)) 2573 def close(self): 2574 self.tarfile.close() 2575 #class TarFileCompat 2576 2577 #-------------------- 2578 # exported functions 2579 #-------------------- 2580 def is_tarfile(name): 2581 """Return True if name points to a tar archive that we 2582 are able to handle, else return False. 2583 """ 2584 try: 2585 t = open(name) 2586 t.close() 2587 return True 2588 except TarError: 2589 return False 2590 2591 bltn_open = open 2592 open = TarFile.open 2593