Home | History | Annotate | Download | only in tar
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package tar
      6 
      7 // TODO(dsymonds):
      8 //   - pax extensions
      9 
     10 import (
     11 	"bytes"
     12 	"errors"
     13 	"io"
     14 	"io/ioutil"
     15 	"os"
     16 	"strconv"
     17 	"strings"
     18 	"time"
     19 )
     20 
     21 var (
     22 	ErrHeader = errors.New("archive/tar: invalid tar header")
     23 )
     24 
     25 const maxNanoSecondIntSize = 9
     26 
     27 // A Reader provides sequential access to the contents of a tar archive.
     28 // A tar archive consists of a sequence of files.
     29 // The Next method advances to the next file in the archive (including the first),
     30 // and then it can be treated as an io.Reader to access the file's data.
     31 type Reader struct {
     32 	r       io.Reader
     33 	err     error
     34 	pad     int64           // amount of padding (ignored) after current file entry
     35 	curr    numBytesReader  // reader for current file entry
     36 	hdrBuff [blockSize]byte // buffer to use in readHeader
     37 }
     38 
     39 // A numBytesReader is an io.Reader with a numBytes method, returning the number
     40 // of bytes remaining in the underlying encoded data.
     41 type numBytesReader interface {
     42 	io.Reader
     43 	numBytes() int64
     44 }
     45 
     46 // A regFileReader is a numBytesReader for reading file data from a tar archive.
     47 type regFileReader struct {
     48 	r  io.Reader // underlying reader
     49 	nb int64     // number of unread bytes for current file entry
     50 }
     51 
     52 // A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive.
     53 type sparseFileReader struct {
     54 	rfr *regFileReader // reads the sparse-encoded file data
     55 	sp  []sparseEntry  // the sparse map for the file
     56 	pos int64          // keeps track of file position
     57 	tot int64          // total size of the file
     58 }
     59 
     60 // Keywords for GNU sparse files in a PAX extended header
     61 const (
     62 	paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
     63 	paxGNUSparseOffset    = "GNU.sparse.offset"
     64 	paxGNUSparseNumBytes  = "GNU.sparse.numbytes"
     65 	paxGNUSparseMap       = "GNU.sparse.map"
     66 	paxGNUSparseName      = "GNU.sparse.name"
     67 	paxGNUSparseMajor     = "GNU.sparse.major"
     68 	paxGNUSparseMinor     = "GNU.sparse.minor"
     69 	paxGNUSparseSize      = "GNU.sparse.size"
     70 	paxGNUSparseRealSize  = "GNU.sparse.realsize"
     71 )
     72 
     73 // Keywords for old GNU sparse headers
     74 const (
     75 	oldGNUSparseMainHeaderOffset               = 386
     76 	oldGNUSparseMainHeaderIsExtendedOffset     = 482
     77 	oldGNUSparseMainHeaderNumEntries           = 4
     78 	oldGNUSparseExtendedHeaderIsExtendedOffset = 504
     79 	oldGNUSparseExtendedHeaderNumEntries       = 21
     80 	oldGNUSparseOffsetSize                     = 12
     81 	oldGNUSparseNumBytesSize                   = 12
     82 )
     83 
     84 // NewReader creates a new Reader reading from r.
     85 func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
     86 
     87 // Next advances to the next entry in the tar archive.
     88 //
     89 // io.EOF is returned at the end of the input.
     90 func (tr *Reader) Next() (*Header, error) {
     91 	var hdr *Header
     92 	if tr.err == nil {
     93 		tr.skipUnread()
     94 	}
     95 	if tr.err != nil {
     96 		return hdr, tr.err
     97 	}
     98 	hdr = tr.readHeader()
     99 	if hdr == nil {
    100 		return hdr, tr.err
    101 	}
    102 	// Check for PAX/GNU header.
    103 	switch hdr.Typeflag {
    104 	case TypeXHeader:
    105 		//  PAX extended header
    106 		headers, err := parsePAX(tr)
    107 		if err != nil {
    108 			return nil, err
    109 		}
    110 		// We actually read the whole file,
    111 		// but this skips alignment padding
    112 		tr.skipUnread()
    113 		if tr.err != nil {
    114 			return nil, tr.err
    115 		}
    116 		hdr = tr.readHeader()
    117 		if hdr == nil {
    118 			return nil, tr.err
    119 		}
    120 		mergePAX(hdr, headers)
    121 
    122 		// Check for a PAX format sparse file
    123 		sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers)
    124 		if err != nil {
    125 			tr.err = err
    126 			return nil, err
    127 		}
    128 		if sp != nil {
    129 			// Current file is a PAX format GNU sparse file.
    130 			// Set the current file reader to a sparse file reader.
    131 			tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
    132 		}
    133 		return hdr, nil
    134 	case TypeGNULongName:
    135 		// We have a GNU long name header. Its contents are the real file name.
    136 		realname, err := ioutil.ReadAll(tr)
    137 		if err != nil {
    138 			return nil, err
    139 		}
    140 		hdr, err := tr.Next()
    141 		hdr.Name = cString(realname)
    142 		return hdr, err
    143 	case TypeGNULongLink:
    144 		// We have a GNU long link header.
    145 		realname, err := ioutil.ReadAll(tr)
    146 		if err != nil {
    147 			return nil, err
    148 		}
    149 		hdr, err := tr.Next()
    150 		hdr.Linkname = cString(realname)
    151 		return hdr, err
    152 	}
    153 	return hdr, tr.err
    154 }
    155 
    156 // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
    157 // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
    158 // be treated as a regular file.
    159 func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
    160 	var sparseFormat string
    161 
    162 	// Check for sparse format indicators
    163 	major, majorOk := headers[paxGNUSparseMajor]
    164 	minor, minorOk := headers[paxGNUSparseMinor]
    165 	sparseName, sparseNameOk := headers[paxGNUSparseName]
    166 	_, sparseMapOk := headers[paxGNUSparseMap]
    167 	sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
    168 	sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
    169 
    170 	// Identify which, if any, sparse format applies from which PAX headers are set
    171 	if majorOk && minorOk {
    172 		sparseFormat = major + "." + minor
    173 	} else if sparseNameOk && sparseMapOk {
    174 		sparseFormat = "0.1"
    175 	} else if sparseSizeOk {
    176 		sparseFormat = "0.0"
    177 	} else {
    178 		// Not a PAX format GNU sparse file.
    179 		return nil, nil
    180 	}
    181 
    182 	// Check for unknown sparse format
    183 	if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
    184 		return nil, nil
    185 	}
    186 
    187 	// Update hdr from GNU sparse PAX headers
    188 	if sparseNameOk {
    189 		hdr.Name = sparseName
    190 	}
    191 	if sparseSizeOk {
    192 		realSize, err := strconv.ParseInt(sparseSize, 10, 0)
    193 		if err != nil {
    194 			return nil, ErrHeader
    195 		}
    196 		hdr.Size = realSize
    197 	} else if sparseRealSizeOk {
    198 		realSize, err := strconv.ParseInt(sparseRealSize, 10, 0)
    199 		if err != nil {
    200 			return nil, ErrHeader
    201 		}
    202 		hdr.Size = realSize
    203 	}
    204 
    205 	// Set up the sparse map, according to the particular sparse format in use
    206 	var sp []sparseEntry
    207 	var err error
    208 	switch sparseFormat {
    209 	case "0.0", "0.1":
    210 		sp, err = readGNUSparseMap0x1(headers)
    211 	case "1.0":
    212 		sp, err = readGNUSparseMap1x0(tr.curr)
    213 	}
    214 	return sp, err
    215 }
    216 
    217 // mergePAX merges well known headers according to PAX standard.
    218 // In general headers with the same name as those found
    219 // in the header struct overwrite those found in the header
    220 // struct with higher precision or longer values. Esp. useful
    221 // for name and linkname fields.
    222 func mergePAX(hdr *Header, headers map[string]string) error {
    223 	for k, v := range headers {
    224 		switch k {
    225 		case paxPath:
    226 			hdr.Name = v
    227 		case paxLinkpath:
    228 			hdr.Linkname = v
    229 		case paxGname:
    230 			hdr.Gname = v
    231 		case paxUname:
    232 			hdr.Uname = v
    233 		case paxUid:
    234 			uid, err := strconv.ParseInt(v, 10, 0)
    235 			if err != nil {
    236 				return err
    237 			}
    238 			hdr.Uid = int(uid)
    239 		case paxGid:
    240 			gid, err := strconv.ParseInt(v, 10, 0)
    241 			if err != nil {
    242 				return err
    243 			}
    244 			hdr.Gid = int(gid)
    245 		case paxAtime:
    246 			t, err := parsePAXTime(v)
    247 			if err != nil {
    248 				return err
    249 			}
    250 			hdr.AccessTime = t
    251 		case paxMtime:
    252 			t, err := parsePAXTime(v)
    253 			if err != nil {
    254 				return err
    255 			}
    256 			hdr.ModTime = t
    257 		case paxCtime:
    258 			t, err := parsePAXTime(v)
    259 			if err != nil {
    260 				return err
    261 			}
    262 			hdr.ChangeTime = t
    263 		case paxSize:
    264 			size, err := strconv.ParseInt(v, 10, 0)
    265 			if err != nil {
    266 				return err
    267 			}
    268 			hdr.Size = int64(size)
    269 		default:
    270 			if strings.HasPrefix(k, paxXattr) {
    271 				if hdr.Xattrs == nil {
    272 					hdr.Xattrs = make(map[string]string)
    273 				}
    274 				hdr.Xattrs[k[len(paxXattr):]] = v
    275 			}
    276 		}
    277 	}
    278 	return nil
    279 }
    280 
    281 // parsePAXTime takes a string of the form %d.%d as described in
    282 // the PAX specification.
    283 func parsePAXTime(t string) (time.Time, error) {
    284 	buf := []byte(t)
    285 	pos := bytes.IndexByte(buf, '.')
    286 	var seconds, nanoseconds int64
    287 	var err error
    288 	if pos == -1 {
    289 		seconds, err = strconv.ParseInt(t, 10, 0)
    290 		if err != nil {
    291 			return time.Time{}, err
    292 		}
    293 	} else {
    294 		seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
    295 		if err != nil {
    296 			return time.Time{}, err
    297 		}
    298 		nano_buf := string(buf[pos+1:])
    299 		// Pad as needed before converting to a decimal.
    300 		// For example .030 -> .030000000 -> 30000000 nanoseconds
    301 		if len(nano_buf) < maxNanoSecondIntSize {
    302 			// Right pad
    303 			nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
    304 		} else if len(nano_buf) > maxNanoSecondIntSize {
    305 			// Right truncate
    306 			nano_buf = nano_buf[:maxNanoSecondIntSize]
    307 		}
    308 		nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
    309 		if err != nil {
    310 			return time.Time{}, err
    311 		}
    312 	}
    313 	ts := time.Unix(seconds, nanoseconds)
    314 	return ts, nil
    315 }
    316 
    317 // parsePAX parses PAX headers.
    318 // If an extended header (type 'x') is invalid, ErrHeader is returned
    319 func parsePAX(r io.Reader) (map[string]string, error) {
    320 	buf, err := ioutil.ReadAll(r)
    321 	if err != nil {
    322 		return nil, err
    323 	}
    324 
    325 	// For GNU PAX sparse format 0.0 support.
    326 	// This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
    327 	var sparseMap bytes.Buffer
    328 
    329 	headers := make(map[string]string)
    330 	// Each record is constructed as
    331 	//     "%d %s=%s\n", length, keyword, value
    332 	for len(buf) > 0 {
    333 		// or the header was empty to start with.
    334 		var sp int
    335 		// The size field ends at the first space.
    336 		sp = bytes.IndexByte(buf, ' ')
    337 		if sp == -1 {
    338 			return nil, ErrHeader
    339 		}
    340 		// Parse the first token as a decimal integer.
    341 		n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
    342 		if err != nil || n < 5 || int64(len(buf)) < n {
    343 			return nil, ErrHeader
    344 		}
    345 		// Extract everything between the decimal and the n -1 on the
    346 		// beginning to eat the ' ', -1 on the end to skip the newline.
    347 		var record []byte
    348 		record, buf = buf[sp+1:n-1], buf[n:]
    349 		// The first equals is guaranteed to mark the end of the key.
    350 		// Everything else is value.
    351 		eq := bytes.IndexByte(record, '=')
    352 		if eq == -1 {
    353 			return nil, ErrHeader
    354 		}
    355 		key, value := record[:eq], record[eq+1:]
    356 
    357 		keyStr := string(key)
    358 		if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
    359 			// GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
    360 			sparseMap.Write(value)
    361 			sparseMap.Write([]byte{','})
    362 		} else {
    363 			// Normal key. Set the value in the headers map.
    364 			headers[keyStr] = string(value)
    365 		}
    366 	}
    367 	if sparseMap.Len() != 0 {
    368 		// Add sparse info to headers, chopping off the extra comma
    369 		sparseMap.Truncate(sparseMap.Len() - 1)
    370 		headers[paxGNUSparseMap] = sparseMap.String()
    371 	}
    372 	return headers, nil
    373 }
    374 
    375 // cString parses bytes as a NUL-terminated C-style string.
    376 // If a NUL byte is not found then the whole slice is returned as a string.
    377 func cString(b []byte) string {
    378 	n := 0
    379 	for n < len(b) && b[n] != 0 {
    380 		n++
    381 	}
    382 	return string(b[0:n])
    383 }
    384 
    385 func (tr *Reader) octal(b []byte) int64 {
    386 	// Check for binary format first.
    387 	if len(b) > 0 && b[0]&0x80 != 0 {
    388 		var x int64
    389 		for i, c := range b {
    390 			if i == 0 {
    391 				c &= 0x7f // ignore signal bit in first byte
    392 			}
    393 			x = x<<8 | int64(c)
    394 		}
    395 		return x
    396 	}
    397 
    398 	// Because unused fields are filled with NULs, we need
    399 	// to skip leading NULs. Fields may also be padded with
    400 	// spaces or NULs.
    401 	// So we remove leading and trailing NULs and spaces to
    402 	// be sure.
    403 	b = bytes.Trim(b, " \x00")
    404 
    405 	if len(b) == 0 {
    406 		return 0
    407 	}
    408 	x, err := strconv.ParseUint(cString(b), 8, 64)
    409 	if err != nil {
    410 		tr.err = err
    411 	}
    412 	return int64(x)
    413 }
    414 
    415 // skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
    416 func (tr *Reader) skipUnread() {
    417 	nr := tr.numBytes() + tr.pad // number of bytes to skip
    418 	tr.curr, tr.pad = nil, 0
    419 	if sr, ok := tr.r.(io.Seeker); ok {
    420 		if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
    421 			return
    422 		}
    423 	}
    424 	_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
    425 }
    426 
    427 func (tr *Reader) verifyChecksum(header []byte) bool {
    428 	if tr.err != nil {
    429 		return false
    430 	}
    431 
    432 	given := tr.octal(header[148:156])
    433 	unsigned, signed := checksum(header)
    434 	return given == unsigned || given == signed
    435 }
    436 
    437 func (tr *Reader) readHeader() *Header {
    438 	header := tr.hdrBuff[:]
    439 	copy(header, zeroBlock)
    440 
    441 	if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
    442 		return nil
    443 	}
    444 
    445 	// Two blocks of zero bytes marks the end of the archive.
    446 	if bytes.Equal(header, zeroBlock[0:blockSize]) {
    447 		if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
    448 			return nil
    449 		}
    450 		if bytes.Equal(header, zeroBlock[0:blockSize]) {
    451 			tr.err = io.EOF
    452 		} else {
    453 			tr.err = ErrHeader // zero block and then non-zero block
    454 		}
    455 		return nil
    456 	}
    457 
    458 	if !tr.verifyChecksum(header) {
    459 		tr.err = ErrHeader
    460 		return nil
    461 	}
    462 
    463 	// Unpack
    464 	hdr := new(Header)
    465 	s := slicer(header)
    466 
    467 	hdr.Name = cString(s.next(100))
    468 	hdr.Mode = tr.octal(s.next(8))
    469 	hdr.Uid = int(tr.octal(s.next(8)))
    470 	hdr.Gid = int(tr.octal(s.next(8)))
    471 	hdr.Size = tr.octal(s.next(12))
    472 	if hdr.Size < 0 {
    473 		tr.err = ErrHeader
    474 		return nil
    475 	}
    476 	hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
    477 	s.next(8) // chksum
    478 	hdr.Typeflag = s.next(1)[0]
    479 	hdr.Linkname = cString(s.next(100))
    480 
    481 	// The remainder of the header depends on the value of magic.
    482 	// The original (v7) version of tar had no explicit magic field,
    483 	// so its magic bytes, like the rest of the block, are NULs.
    484 	magic := string(s.next(8)) // contains version field as well.
    485 	var format string
    486 	switch {
    487 	case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988)
    488 		if string(header[508:512]) == "tar\x00" {
    489 			format = "star"
    490 		} else {
    491 			format = "posix"
    492 		}
    493 	case magic == "ustar  \x00": // old GNU tar
    494 		format = "gnu"
    495 	}
    496 
    497 	switch format {
    498 	case "posix", "gnu", "star":
    499 		hdr.Uname = cString(s.next(32))
    500 		hdr.Gname = cString(s.next(32))
    501 		devmajor := s.next(8)
    502 		devminor := s.next(8)
    503 		if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
    504 			hdr.Devmajor = tr.octal(devmajor)
    505 			hdr.Devminor = tr.octal(devminor)
    506 		}
    507 		var prefix string
    508 		switch format {
    509 		case "posix", "gnu":
    510 			prefix = cString(s.next(155))
    511 		case "star":
    512 			prefix = cString(s.next(131))
    513 			hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
    514 			hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
    515 		}
    516 		if len(prefix) > 0 {
    517 			hdr.Name = prefix + "/" + hdr.Name
    518 		}
    519 	}
    520 
    521 	if tr.err != nil {
    522 		tr.err = ErrHeader
    523 		return nil
    524 	}
    525 
    526 	// Maximum value of hdr.Size is 64 GB (12 octal digits),
    527 	// so there's no risk of int64 overflowing.
    528 	nb := int64(hdr.Size)
    529 	tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
    530 
    531 	// Set the current file reader.
    532 	tr.curr = &regFileReader{r: tr.r, nb: nb}
    533 
    534 	// Check for old GNU sparse format entry.
    535 	if hdr.Typeflag == TypeGNUSparse {
    536 		// Get the real size of the file.
    537 		hdr.Size = tr.octal(header[483:495])
    538 
    539 		// Read the sparse map.
    540 		sp := tr.readOldGNUSparseMap(header)
    541 		if tr.err != nil {
    542 			return nil
    543 		}
    544 		// Current file is a GNU sparse file. Update the current file reader.
    545 		tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
    546 	}
    547 
    548 	return hdr
    549 }
    550 
    551 // A sparseEntry holds a single entry in a sparse file's sparse map.
    552 // A sparse entry indicates the offset and size in a sparse file of a
    553 // block of data.
    554 type sparseEntry struct {
    555 	offset   int64
    556 	numBytes int64
    557 }
    558 
    559 // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
    560 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
    561 // then one or more extension headers are used to store the rest of the sparse map.
    562 func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
    563 	isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
    564 	spCap := oldGNUSparseMainHeaderNumEntries
    565 	if isExtended {
    566 		spCap += oldGNUSparseExtendedHeaderNumEntries
    567 	}
    568 	sp := make([]sparseEntry, 0, spCap)
    569 	s := slicer(header[oldGNUSparseMainHeaderOffset:])
    570 
    571 	// Read the four entries from the main tar header
    572 	for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
    573 		offset := tr.octal(s.next(oldGNUSparseOffsetSize))
    574 		numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
    575 		if tr.err != nil {
    576 			tr.err = ErrHeader
    577 			return nil
    578 		}
    579 		if offset == 0 && numBytes == 0 {
    580 			break
    581 		}
    582 		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
    583 	}
    584 
    585 	for isExtended {
    586 		// There are more entries. Read an extension header and parse its entries.
    587 		sparseHeader := make([]byte, blockSize)
    588 		if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil {
    589 			return nil
    590 		}
    591 		isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
    592 		s = slicer(sparseHeader)
    593 		for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
    594 			offset := tr.octal(s.next(oldGNUSparseOffsetSize))
    595 			numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
    596 			if tr.err != nil {
    597 				tr.err = ErrHeader
    598 				return nil
    599 			}
    600 			if offset == 0 && numBytes == 0 {
    601 				break
    602 			}
    603 			sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
    604 		}
    605 	}
    606 	return sp
    607 }
    608 
    609 // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0.
    610 // The sparse map is stored just before the file data and padded out to the nearest block boundary.
    611 func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
    612 	buf := make([]byte, 2*blockSize)
    613 	sparseHeader := buf[:blockSize]
    614 
    615 	// readDecimal is a helper function to read a decimal integer from the sparse map
    616 	// while making sure to read from the file in blocks of size blockSize
    617 	readDecimal := func() (int64, error) {
    618 		// Look for newline
    619 		nl := bytes.IndexByte(sparseHeader, '\n')
    620 		if nl == -1 {
    621 			if len(sparseHeader) >= blockSize {
    622 				// This is an error
    623 				return 0, ErrHeader
    624 			}
    625 			oldLen := len(sparseHeader)
    626 			newLen := oldLen + blockSize
    627 			if cap(sparseHeader) < newLen {
    628 				// There's more header, but we need to make room for the next block
    629 				copy(buf, sparseHeader)
    630 				sparseHeader = buf[:newLen]
    631 			} else {
    632 				// There's more header, and we can just reslice
    633 				sparseHeader = sparseHeader[:newLen]
    634 			}
    635 
    636 			// Now that sparseHeader is large enough, read next block
    637 			if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil {
    638 				return 0, err
    639 			}
    640 
    641 			// Look for a newline in the new data
    642 			nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n')
    643 			if nl == -1 {
    644 				// This is an error
    645 				return 0, ErrHeader
    646 			}
    647 			nl += oldLen // We want the position from the beginning
    648 		}
    649 		// Now that we've found a newline, read a number
    650 		n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0)
    651 		if err != nil {
    652 			return 0, ErrHeader
    653 		}
    654 
    655 		// Update sparseHeader to consume this number
    656 		sparseHeader = sparseHeader[nl+1:]
    657 		return n, nil
    658 	}
    659 
    660 	// Read the first block
    661 	if _, err := io.ReadFull(r, sparseHeader); err != nil {
    662 		return nil, err
    663 	}
    664 
    665 	// The first line contains the number of entries
    666 	numEntries, err := readDecimal()
    667 	if err != nil {
    668 		return nil, err
    669 	}
    670 
    671 	// Read all the entries
    672 	sp := make([]sparseEntry, 0, numEntries)
    673 	for i := int64(0); i < numEntries; i++ {
    674 		// Read the offset
    675 		offset, err := readDecimal()
    676 		if err != nil {
    677 			return nil, err
    678 		}
    679 		// Read numBytes
    680 		numBytes, err := readDecimal()
    681 		if err != nil {
    682 			return nil, err
    683 		}
    684 
    685 		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
    686 	}
    687 
    688 	return sp, nil
    689 }
    690 
    691 // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1.
    692 // The sparse map is stored in the PAX headers.
    693 func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) {
    694 	// Get number of entries
    695 	numEntriesStr, ok := headers[paxGNUSparseNumBlocks]
    696 	if !ok {
    697 		return nil, ErrHeader
    698 	}
    699 	numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
    700 	if err != nil {
    701 		return nil, ErrHeader
    702 	}
    703 
    704 	sparseMap := strings.Split(headers[paxGNUSparseMap], ",")
    705 
    706 	// There should be two numbers in sparseMap for each entry
    707 	if int64(len(sparseMap)) != 2*numEntries {
    708 		return nil, ErrHeader
    709 	}
    710 
    711 	// Loop through the entries in the sparse map
    712 	sp := make([]sparseEntry, 0, numEntries)
    713 	for i := int64(0); i < numEntries; i++ {
    714 		offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0)
    715 		if err != nil {
    716 			return nil, ErrHeader
    717 		}
    718 		numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0)
    719 		if err != nil {
    720 			return nil, ErrHeader
    721 		}
    722 		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
    723 	}
    724 
    725 	return sp, nil
    726 }
    727 
    728 // numBytes returns the number of bytes left to read in the current file's entry
    729 // in the tar archive, or 0 if there is no current file.
    730 func (tr *Reader) numBytes() int64 {
    731 	if tr.curr == nil {
    732 		// No current file, so no bytes
    733 		return 0
    734 	}
    735 	return tr.curr.numBytes()
    736 }
    737 
    738 // Read reads from the current entry in the tar archive.
    739 // It returns 0, io.EOF when it reaches the end of that entry,
    740 // until Next is called to advance to the next entry.
    741 func (tr *Reader) Read(b []byte) (n int, err error) {
    742 	if tr.curr == nil {
    743 		return 0, io.EOF
    744 	}
    745 	n, err = tr.curr.Read(b)
    746 	if err != nil && err != io.EOF {
    747 		tr.err = err
    748 	}
    749 	return
    750 }
    751 
    752 func (rfr *regFileReader) Read(b []byte) (n int, err error) {
    753 	if rfr.nb == 0 {
    754 		// file consumed
    755 		return 0, io.EOF
    756 	}
    757 	if int64(len(b)) > rfr.nb {
    758 		b = b[0:rfr.nb]
    759 	}
    760 	n, err = rfr.r.Read(b)
    761 	rfr.nb -= int64(n)
    762 
    763 	if err == io.EOF && rfr.nb > 0 {
    764 		err = io.ErrUnexpectedEOF
    765 	}
    766 	return
    767 }
    768 
    769 // numBytes returns the number of bytes left to read in the file's data in the tar archive.
    770 func (rfr *regFileReader) numBytes() int64 {
    771 	return rfr.nb
    772 }
    773 
    774 // readHole reads a sparse file hole ending at offset toOffset
    775 func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
    776 	n64 := toOffset - sfr.pos
    777 	if n64 > int64(len(b)) {
    778 		n64 = int64(len(b))
    779 	}
    780 	n := int(n64)
    781 	for i := 0; i < n; i++ {
    782 		b[i] = 0
    783 	}
    784 	sfr.pos += n64
    785 	return n
    786 }
    787 
    788 // Read reads the sparse file data in expanded form.
    789 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
    790 	if len(sfr.sp) == 0 {
    791 		// No more data fragments to read from.
    792 		if sfr.pos < sfr.tot {
    793 			// We're in the last hole
    794 			n = sfr.readHole(b, sfr.tot)
    795 			return
    796 		}
    797 		// Otherwise, we're at the end of the file
    798 		return 0, io.EOF
    799 	}
    800 	if sfr.tot < sfr.sp[0].offset {
    801 		return 0, io.ErrUnexpectedEOF
    802 	}
    803 	if sfr.pos < sfr.sp[0].offset {
    804 		// We're in a hole
    805 		n = sfr.readHole(b, sfr.sp[0].offset)
    806 		return
    807 	}
    808 
    809 	// We're not in a hole, so we'll read from the next data fragment
    810 	posInFragment := sfr.pos - sfr.sp[0].offset
    811 	bytesLeft := sfr.sp[0].numBytes - posInFragment
    812 	if int64(len(b)) > bytesLeft {
    813 		b = b[0:bytesLeft]
    814 	}
    815 
    816 	n, err = sfr.rfr.Read(b)
    817 	sfr.pos += int64(n)
    818 
    819 	if int64(n) == bytesLeft {
    820 		// We're done with this fragment
    821 		sfr.sp = sfr.sp[1:]
    822 	}
    823 
    824 	if err == io.EOF && sfr.pos < sfr.tot {
    825 		// We reached the end of the last fragment's data, but there's a final hole
    826 		err = nil
    827 	}
    828 	return
    829 }
    830 
    831 // numBytes returns the number of bytes left to read in the sparse file's
    832 // sparse-encoded data in the tar archive.
    833 func (sfr *sparseFileReader) numBytes() int64 {
    834 	return sfr.rfr.nb
    835 }
    836