Home | History | Annotate | Download | only in tar
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package tar
      6 
      7 // TODO(dsymonds):
      8 //   - pax extensions
      9 
     10 import (
     11 	"bytes"
     12 	"errors"
     13 	"io"
     14 	"io/ioutil"
     15 	"math"
     16 	"strconv"
     17 	"strings"
     18 	"time"
     19 )
     20 
     21 var (
     22 	ErrHeader = errors.New("archive/tar: invalid tar header")
     23 )
     24 
     25 // A Reader provides sequential access to the contents of a tar archive.
     26 // A tar archive consists of a sequence of files.
     27 // The Next method advances to the next file in the archive (including the first),
     28 // and then it can be treated as an io.Reader to access the file's data.
     29 type Reader struct {
     30 	r    io.Reader
     31 	pad  int64          // amount of padding (ignored) after current file entry
     32 	curr numBytesReader // reader for current file entry
     33 	blk  block          // buffer to use as temporary local storage
     34 
     35 	// err is a persistent error.
     36 	// It is only the responsibility of every exported method of Reader to
     37 	// ensure that this error is sticky.
     38 	err error
     39 }
     40 
     41 // A numBytesReader is an io.Reader with a numBytes method, returning the number
     42 // of bytes remaining in the underlying encoded data.
     43 type numBytesReader interface {
     44 	io.Reader
     45 	numBytes() int64
     46 }
     47 
     48 // A regFileReader is a numBytesReader for reading file data from a tar archive.
     49 type regFileReader struct {
     50 	r  io.Reader // underlying reader
     51 	nb int64     // number of unread bytes for current file entry
     52 }
     53 
     54 // A sparseFileReader is a numBytesReader for reading sparse file data from a
     55 // tar archive.
     56 type sparseFileReader struct {
     57 	rfr   numBytesReader // Reads the sparse-encoded file data
     58 	sp    []sparseEntry  // The sparse map for the file
     59 	pos   int64          // Keeps track of file position
     60 	total int64          // Total size of the file
     61 }
     62 
     63 // A sparseEntry holds a single entry in a sparse file's sparse map.
     64 //
     65 // Sparse files are represented using a series of sparseEntrys.
     66 // Despite the name, a sparseEntry represents an actual data fragment that
     67 // references data found in the underlying archive stream. All regions not
     68 // covered by a sparseEntry are logically filled with zeros.
     69 //
     70 // For example, if the underlying raw file contains the 10-byte data:
     71 //	var compactData = "abcdefgh"
     72 //
     73 // And the sparse map has the following entries:
     74 //	var sp = []sparseEntry{
     75 //		{offset: 2,  numBytes: 5} // Data fragment for [2..7]
     76 //		{offset: 18, numBytes: 3} // Data fragment for [18..21]
     77 //	}
     78 //
     79 // Then the content of the resulting sparse file with a "real" size of 25 is:
     80 //	var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
     81 type sparseEntry struct {
     82 	offset   int64 // Starting position of the fragment
     83 	numBytes int64 // Length of the fragment
     84 }
     85 
     86 // Keywords for GNU sparse files in a PAX extended header
     87 const (
     88 	paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
     89 	paxGNUSparseOffset    = "GNU.sparse.offset"
     90 	paxGNUSparseNumBytes  = "GNU.sparse.numbytes"
     91 	paxGNUSparseMap       = "GNU.sparse.map"
     92 	paxGNUSparseName      = "GNU.sparse.name"
     93 	paxGNUSparseMajor     = "GNU.sparse.major"
     94 	paxGNUSparseMinor     = "GNU.sparse.minor"
     95 	paxGNUSparseSize      = "GNU.sparse.size"
     96 	paxGNUSparseRealSize  = "GNU.sparse.realsize"
     97 )
     98 
     99 // NewReader creates a new Reader reading from r.
    100 func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
    101 
    102 // Next advances to the next entry in the tar archive.
    103 //
    104 // io.EOF is returned at the end of the input.
    105 func (tr *Reader) Next() (*Header, error) {
    106 	if tr.err != nil {
    107 		return nil, tr.err
    108 	}
    109 	hdr, err := tr.next()
    110 	tr.err = err
    111 	return hdr, err
    112 }
    113 
    114 func (tr *Reader) next() (*Header, error) {
    115 	var extHdrs map[string]string
    116 
    117 	// Externally, Next iterates through the tar archive as if it is a series of
    118 	// files. Internally, the tar format often uses fake "files" to add meta
    119 	// data that describes the next file. These meta data "files" should not
    120 	// normally be visible to the outside. As such, this loop iterates through
    121 	// one or more "header files" until it finds a "normal file".
    122 loop:
    123 	for {
    124 		if err := tr.skipUnread(); err != nil {
    125 			return nil, err
    126 		}
    127 		hdr, rawHdr, err := tr.readHeader()
    128 		if err != nil {
    129 			return nil, err
    130 		}
    131 		if err := tr.handleRegularFile(hdr); err != nil {
    132 			return nil, err
    133 		}
    134 
    135 		// Check for PAX/GNU special headers and files.
    136 		switch hdr.Typeflag {
    137 		case TypeXHeader:
    138 			extHdrs, err = parsePAX(tr)
    139 			if err != nil {
    140 				return nil, err
    141 			}
    142 			continue loop // This is a meta header affecting the next header
    143 		case TypeGNULongName, TypeGNULongLink:
    144 			realname, err := ioutil.ReadAll(tr)
    145 			if err != nil {
    146 				return nil, err
    147 			}
    148 
    149 			// Convert GNU extensions to use PAX headers.
    150 			if extHdrs == nil {
    151 				extHdrs = make(map[string]string)
    152 			}
    153 			var p parser
    154 			switch hdr.Typeflag {
    155 			case TypeGNULongName:
    156 				extHdrs[paxPath] = p.parseString(realname)
    157 			case TypeGNULongLink:
    158 				extHdrs[paxLinkpath] = p.parseString(realname)
    159 			}
    160 			if p.err != nil {
    161 				return nil, p.err
    162 			}
    163 			continue loop // This is a meta header affecting the next header
    164 		default:
    165 			// The old GNU sparse format is handled here since it is technically
    166 			// just a regular file with additional attributes.
    167 
    168 			if err := mergePAX(hdr, extHdrs); err != nil {
    169 				return nil, err
    170 			}
    171 
    172 			// The extended headers may have updated the size.
    173 			// Thus, setup the regFileReader again after merging PAX headers.
    174 			if err := tr.handleRegularFile(hdr); err != nil {
    175 				return nil, err
    176 			}
    177 
    178 			// Sparse formats rely on being able to read from the logical data
    179 			// section; there must be a preceding call to handleRegularFile.
    180 			if err := tr.handleSparseFile(hdr, rawHdr, extHdrs); err != nil {
    181 				return nil, err
    182 			}
    183 			return hdr, nil // This is a file, so stop
    184 		}
    185 	}
    186 }
    187 
    188 // handleRegularFile sets up the current file reader and padding such that it
    189 // can only read the following logical data section. It will properly handle
    190 // special headers that contain no data section.
    191 func (tr *Reader) handleRegularFile(hdr *Header) error {
    192 	nb := hdr.Size
    193 	if isHeaderOnlyType(hdr.Typeflag) {
    194 		nb = 0
    195 	}
    196 	if nb < 0 {
    197 		return ErrHeader
    198 	}
    199 
    200 	tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
    201 	tr.curr = &regFileReader{r: tr.r, nb: nb}
    202 	return nil
    203 }
    204 
    205 // handleSparseFile checks if the current file is a sparse format of any type
    206 // and sets the curr reader appropriately.
    207 func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[string]string) error {
    208 	var sp []sparseEntry
    209 	var err error
    210 	if hdr.Typeflag == TypeGNUSparse {
    211 		sp, err = tr.readOldGNUSparseMap(hdr, rawHdr)
    212 		if err != nil {
    213 			return err
    214 		}
    215 	} else {
    216 		sp, err = tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
    217 		if err != nil {
    218 			return err
    219 		}
    220 	}
    221 
    222 	// If sp is non-nil, then this is a sparse file.
    223 	// Note that it is possible for len(sp) to be zero.
    224 	if sp != nil {
    225 		tr.curr, err = newSparseFileReader(tr.curr, sp, hdr.Size)
    226 	}
    227 	return err
    228 }
    229 
    230 // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
    231 // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
    232 // be treated as a regular file.
    233 func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
    234 	var sparseFormat string
    235 
    236 	// Check for sparse format indicators
    237 	major, majorOk := headers[paxGNUSparseMajor]
    238 	minor, minorOk := headers[paxGNUSparseMinor]
    239 	sparseName, sparseNameOk := headers[paxGNUSparseName]
    240 	_, sparseMapOk := headers[paxGNUSparseMap]
    241 	sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
    242 	sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
    243 
    244 	// Identify which, if any, sparse format applies from which PAX headers are set
    245 	if majorOk && minorOk {
    246 		sparseFormat = major + "." + minor
    247 	} else if sparseNameOk && sparseMapOk {
    248 		sparseFormat = "0.1"
    249 	} else if sparseSizeOk {
    250 		sparseFormat = "0.0"
    251 	} else {
    252 		// Not a PAX format GNU sparse file.
    253 		return nil, nil
    254 	}
    255 
    256 	// Check for unknown sparse format
    257 	if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
    258 		return nil, nil
    259 	}
    260 
    261 	// Update hdr from GNU sparse PAX headers
    262 	if sparseNameOk {
    263 		hdr.Name = sparseName
    264 	}
    265 	if sparseSizeOk {
    266 		realSize, err := strconv.ParseInt(sparseSize, 10, 64)
    267 		if err != nil {
    268 			return nil, ErrHeader
    269 		}
    270 		hdr.Size = realSize
    271 	} else if sparseRealSizeOk {
    272 		realSize, err := strconv.ParseInt(sparseRealSize, 10, 64)
    273 		if err != nil {
    274 			return nil, ErrHeader
    275 		}
    276 		hdr.Size = realSize
    277 	}
    278 
    279 	// Set up the sparse map, according to the particular sparse format in use
    280 	var sp []sparseEntry
    281 	var err error
    282 	switch sparseFormat {
    283 	case "0.0", "0.1":
    284 		sp, err = readGNUSparseMap0x1(headers)
    285 	case "1.0":
    286 		sp, err = readGNUSparseMap1x0(tr.curr)
    287 	}
    288 	return sp, err
    289 }
    290 
    291 // mergePAX merges well known headers according to PAX standard.
    292 // In general headers with the same name as those found
    293 // in the header struct overwrite those found in the header
    294 // struct with higher precision or longer values. Esp. useful
    295 // for name and linkname fields.
    296 func mergePAX(hdr *Header, headers map[string]string) (err error) {
    297 	var id64 int64
    298 	for k, v := range headers {
    299 		switch k {
    300 		case paxPath:
    301 			hdr.Name = v
    302 		case paxLinkpath:
    303 			hdr.Linkname = v
    304 		case paxUname:
    305 			hdr.Uname = v
    306 		case paxGname:
    307 			hdr.Gname = v
    308 		case paxUid:
    309 			id64, err = strconv.ParseInt(v, 10, 64)
    310 			hdr.Uid = int(id64) // Integer overflow possible
    311 		case paxGid:
    312 			id64, err = strconv.ParseInt(v, 10, 64)
    313 			hdr.Gid = int(id64) // Integer overflow possible
    314 		case paxAtime:
    315 			hdr.AccessTime, err = parsePAXTime(v)
    316 		case paxMtime:
    317 			hdr.ModTime, err = parsePAXTime(v)
    318 		case paxCtime:
    319 			hdr.ChangeTime, err = parsePAXTime(v)
    320 		case paxSize:
    321 			hdr.Size, err = strconv.ParseInt(v, 10, 64)
    322 		default:
    323 			if strings.HasPrefix(k, paxXattr) {
    324 				if hdr.Xattrs == nil {
    325 					hdr.Xattrs = make(map[string]string)
    326 				}
    327 				hdr.Xattrs[k[len(paxXattr):]] = v
    328 			}
    329 		}
    330 		if err != nil {
    331 			return ErrHeader
    332 		}
    333 	}
    334 	return nil
    335 }
    336 
    337 // parsePAX parses PAX headers.
    338 // If an extended header (type 'x') is invalid, ErrHeader is returned
    339 func parsePAX(r io.Reader) (map[string]string, error) {
    340 	buf, err := ioutil.ReadAll(r)
    341 	if err != nil {
    342 		return nil, err
    343 	}
    344 	sbuf := string(buf)
    345 
    346 	// For GNU PAX sparse format 0.0 support.
    347 	// This function transforms the sparse format 0.0 headers into format 0.1
    348 	// headers since 0.0 headers were not PAX compliant.
    349 	var sparseMap []string
    350 
    351 	extHdrs := make(map[string]string)
    352 	for len(sbuf) > 0 {
    353 		key, value, residual, err := parsePAXRecord(sbuf)
    354 		if err != nil {
    355 			return nil, ErrHeader
    356 		}
    357 		sbuf = residual
    358 
    359 		switch key {
    360 		case paxGNUSparseOffset, paxGNUSparseNumBytes:
    361 			// Validate sparse header order and value.
    362 			if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) ||
    363 				(len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) ||
    364 				strings.Contains(value, ",") {
    365 				return nil, ErrHeader
    366 			}
    367 			sparseMap = append(sparseMap, value)
    368 		default:
    369 			// According to PAX specification, a value is stored only if it is
    370 			// non-empty. Otherwise, the key is deleted.
    371 			if len(value) > 0 {
    372 				extHdrs[key] = value
    373 			} else {
    374 				delete(extHdrs, key)
    375 			}
    376 		}
    377 	}
    378 	if len(sparseMap) > 0 {
    379 		extHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",")
    380 	}
    381 	return extHdrs, nil
    382 }
    383 
    384 // skipUnread skips any unread bytes in the existing file entry, as well as any
    385 // alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
    386 // encountered in the data portion; it is okay to hit io.EOF in the padding.
    387 //
    388 // Note that this function still works properly even when sparse files are being
    389 // used since numBytes returns the bytes remaining in the underlying io.Reader.
    390 func (tr *Reader) skipUnread() error {
    391 	dataSkip := tr.numBytes()      // Number of data bytes to skip
    392 	totalSkip := dataSkip + tr.pad // Total number of bytes to skip
    393 	tr.curr, tr.pad = nil, 0
    394 
    395 	// If possible, Seek to the last byte before the end of the data section.
    396 	// Do this because Seek is often lazy about reporting errors; this will mask
    397 	// the fact that the tar stream may be truncated. We can rely on the
    398 	// io.CopyN done shortly afterwards to trigger any IO errors.
    399 	var seekSkipped int64 // Number of bytes skipped via Seek
    400 	if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
    401 		// Not all io.Seeker can actually Seek. For example, os.Stdin implements
    402 		// io.Seeker, but calling Seek always returns an error and performs
    403 		// no action. Thus, we try an innocent seek to the current position
    404 		// to see if Seek is really supported.
    405 		pos1, err := sr.Seek(0, io.SeekCurrent)
    406 		if err == nil {
    407 			// Seek seems supported, so perform the real Seek.
    408 			pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent)
    409 			if err != nil {
    410 				return err
    411 			}
    412 			seekSkipped = pos2 - pos1
    413 		}
    414 	}
    415 
    416 	copySkipped, err := io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
    417 	if err == io.EOF && seekSkipped+copySkipped < dataSkip {
    418 		err = io.ErrUnexpectedEOF
    419 	}
    420 	return err
    421 }
    422 
    423 // readHeader reads the next block header and assumes that the underlying reader
    424 // is already aligned to a block boundary. It returns the raw block of the
    425 // header in case further processing is required.
    426 //
    427 // The err will be set to io.EOF only when one of the following occurs:
    428 //	* Exactly 0 bytes are read and EOF is hit.
    429 //	* Exactly 1 block of zeros is read and EOF is hit.
    430 //	* At least 2 blocks of zeros are read.
    431 func (tr *Reader) readHeader() (*Header, *block, error) {
    432 	// Two blocks of zero bytes marks the end of the archive.
    433 	if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
    434 		return nil, nil, err // EOF is okay here; exactly 0 bytes read
    435 	}
    436 	if bytes.Equal(tr.blk[:], zeroBlock[:]) {
    437 		if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
    438 			return nil, nil, err // EOF is okay here; exactly 1 block of zeros read
    439 		}
    440 		if bytes.Equal(tr.blk[:], zeroBlock[:]) {
    441 			return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read
    442 		}
    443 		return nil, nil, ErrHeader // Zero block and then non-zero block
    444 	}
    445 
    446 	// Verify the header matches a known format.
    447 	format := tr.blk.GetFormat()
    448 	if format == formatUnknown {
    449 		return nil, nil, ErrHeader
    450 	}
    451 
    452 	var p parser
    453 	hdr := new(Header)
    454 
    455 	// Unpack the V7 header.
    456 	v7 := tr.blk.V7()
    457 	hdr.Name = p.parseString(v7.Name())
    458 	hdr.Mode = p.parseNumeric(v7.Mode())
    459 	hdr.Uid = int(p.parseNumeric(v7.UID()))
    460 	hdr.Gid = int(p.parseNumeric(v7.GID()))
    461 	hdr.Size = p.parseNumeric(v7.Size())
    462 	hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0)
    463 	hdr.Typeflag = v7.TypeFlag()[0]
    464 	hdr.Linkname = p.parseString(v7.LinkName())
    465 
    466 	// The atime and ctime fields are often left unused. Some versions of Go
    467 	// had a bug in the tar.Writer where it would output an invalid tar file
    468 	// in certain rare situations because the logic incorrectly believed that
    469 	// the old GNU format had a prefix field. This is wrong and leads to
    470 	// an outputted file that actually mangles the atime and ctime fields.
    471 	//
    472 	// In order to continue reading tar files created by a buggy writer, we
    473 	// try to parse the atime and ctime fields, but just return the zero value
    474 	// of time.Time when we cannot parse them.
    475 	//
    476 	// See https://golang.org/issues/12594
    477 	tryParseTime := func(b []byte) time.Time {
    478 		var p parser
    479 		n := p.parseNumeric(b)
    480 		if b[0] != 0x00 && p.err == nil {
    481 			return time.Unix(n, 0)
    482 		}
    483 		return time.Time{}
    484 	}
    485 
    486 	// Unpack format specific fields.
    487 	if format > formatV7 {
    488 		ustar := tr.blk.USTAR()
    489 		hdr.Uname = p.parseString(ustar.UserName())
    490 		hdr.Gname = p.parseString(ustar.GroupName())
    491 		if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
    492 			hdr.Devmajor = p.parseNumeric(ustar.DevMajor())
    493 			hdr.Devminor = p.parseNumeric(ustar.DevMinor())
    494 		}
    495 
    496 		var prefix string
    497 		switch format {
    498 		case formatUSTAR:
    499 			ustar := tr.blk.USTAR()
    500 			prefix = p.parseString(ustar.Prefix())
    501 		case formatSTAR:
    502 			star := tr.blk.STAR()
    503 			prefix = p.parseString(star.Prefix())
    504 			hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0)
    505 			hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0)
    506 		case formatGNU:
    507 			gnu := tr.blk.GNU()
    508 			hdr.AccessTime = tryParseTime(gnu.AccessTime())
    509 			hdr.ChangeTime = tryParseTime(gnu.ChangeTime())
    510 		}
    511 		if len(prefix) > 0 {
    512 			hdr.Name = prefix + "/" + hdr.Name
    513 		}
    514 	}
    515 	return hdr, &tr.blk, p.err
    516 }
    517 
    518 // readOldGNUSparseMap reads the sparse map from the old GNU sparse format.
    519 // The sparse map is stored in the tar header if it's small enough.
    520 // If it's larger than four entries, then one or more extension headers are used
    521 // to store the rest of the sparse map.
    522 //
    523 // The Header.Size does not reflect the size of any extended headers used.
    524 // Thus, this function will read from the raw io.Reader to fetch extra headers.
    525 // This method mutates blk in the process.
    526 func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, error) {
    527 	// Make sure that the input format is GNU.
    528 	// Unfortunately, the STAR format also has a sparse header format that uses
    529 	// the same type flag but has a completely different layout.
    530 	if blk.GetFormat() != formatGNU {
    531 		return nil, ErrHeader
    532 	}
    533 
    534 	var p parser
    535 	hdr.Size = p.parseNumeric(blk.GNU().RealSize())
    536 	if p.err != nil {
    537 		return nil, p.err
    538 	}
    539 	var s sparseArray = blk.GNU().Sparse()
    540 	var sp = make([]sparseEntry, 0, s.MaxEntries())
    541 	for {
    542 		for i := 0; i < s.MaxEntries(); i++ {
    543 			// This termination condition is identical to GNU and BSD tar.
    544 			if s.Entry(i).Offset()[0] == 0x00 {
    545 				break // Don't return, need to process extended headers (even if empty)
    546 			}
    547 			offset := p.parseNumeric(s.Entry(i).Offset())
    548 			numBytes := p.parseNumeric(s.Entry(i).NumBytes())
    549 			if p.err != nil {
    550 				return nil, p.err
    551 			}
    552 			sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
    553 		}
    554 
    555 		if s.IsExtended()[0] > 0 {
    556 			// There are more entries. Read an extension header and parse its entries.
    557 			if _, err := io.ReadFull(tr.r, blk[:]); err != nil {
    558 				if err == io.EOF {
    559 					err = io.ErrUnexpectedEOF
    560 				}
    561 				return nil, err
    562 			}
    563 			s = blk.Sparse()
    564 			continue
    565 		}
    566 		return sp, nil // Done
    567 	}
    568 }
    569 
    570 // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
    571 // version 1.0. The format of the sparse map consists of a series of
    572 // newline-terminated numeric fields. The first field is the number of entries
    573 // and is always present. Following this are the entries, consisting of two
    574 // fields (offset, numBytes). This function must stop reading at the end
    575 // boundary of the block containing the last newline.
    576 //
    577 // Note that the GNU manual says that numeric values should be encoded in octal
    578 // format. However, the GNU tar utility itself outputs these values in decimal.
    579 // As such, this library treats values as being encoded in decimal.
    580 func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
    581 	var cntNewline int64
    582 	var buf bytes.Buffer
    583 	var blk = make([]byte, blockSize)
    584 
    585 	// feedTokens copies data in numBlock chunks from r into buf until there are
    586 	// at least cnt newlines in buf. It will not read more blocks than needed.
    587 	var feedTokens = func(cnt int64) error {
    588 		for cntNewline < cnt {
    589 			if _, err := io.ReadFull(r, blk); err != nil {
    590 				if err == io.EOF {
    591 					err = io.ErrUnexpectedEOF
    592 				}
    593 				return err
    594 			}
    595 			buf.Write(blk)
    596 			for _, c := range blk {
    597 				if c == '\n' {
    598 					cntNewline++
    599 				}
    600 			}
    601 		}
    602 		return nil
    603 	}
    604 
    605 	// nextToken gets the next token delimited by a newline. This assumes that
    606 	// at least one newline exists in the buffer.
    607 	var nextToken = func() string {
    608 		cntNewline--
    609 		tok, _ := buf.ReadString('\n')
    610 		return tok[:len(tok)-1] // Cut off newline
    611 	}
    612 
    613 	// Parse for the number of entries.
    614 	// Use integer overflow resistant math to check this.
    615 	if err := feedTokens(1); err != nil {
    616 		return nil, err
    617 	}
    618 	numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
    619 	if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
    620 		return nil, ErrHeader
    621 	}
    622 
    623 	// Parse for all member entries.
    624 	// numEntries is trusted after this since a potential attacker must have
    625 	// committed resources proportional to what this library used.
    626 	if err := feedTokens(2 * numEntries); err != nil {
    627 		return nil, err
    628 	}
    629 	sp := make([]sparseEntry, 0, numEntries)
    630 	for i := int64(0); i < numEntries; i++ {
    631 		offset, err := strconv.ParseInt(nextToken(), 10, 64)
    632 		if err != nil {
    633 			return nil, ErrHeader
    634 		}
    635 		numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
    636 		if err != nil {
    637 			return nil, ErrHeader
    638 		}
    639 		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
    640 	}
    641 	return sp, nil
    642 }
    643 
    644 // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
    645 // version 0.1. The sparse map is stored in the PAX headers.
    646 func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
    647 	// Get number of entries.
    648 	// Use integer overflow resistant math to check this.
    649 	numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
    650 	numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
    651 	if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
    652 		return nil, ErrHeader
    653 	}
    654 
    655 	// There should be two numbers in sparseMap for each entry.
    656 	sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
    657 	if int64(len(sparseMap)) != 2*numEntries {
    658 		return nil, ErrHeader
    659 	}
    660 
    661 	// Loop through the entries in the sparse map.
    662 	// numEntries is trusted now.
    663 	sp := make([]sparseEntry, 0, numEntries)
    664 	for i := int64(0); i < numEntries; i++ {
    665 		offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
    666 		if err != nil {
    667 			return nil, ErrHeader
    668 		}
    669 		numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
    670 		if err != nil {
    671 			return nil, ErrHeader
    672 		}
    673 		sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
    674 	}
    675 	return sp, nil
    676 }
    677 
    678 // numBytes returns the number of bytes left to read in the current file's entry
    679 // in the tar archive, or 0 if there is no current file.
    680 func (tr *Reader) numBytes() int64 {
    681 	if tr.curr == nil {
    682 		// No current file, so no bytes
    683 		return 0
    684 	}
    685 	return tr.curr.numBytes()
    686 }
    687 
    688 // Read reads from the current entry in the tar archive.
    689 // It returns 0, io.EOF when it reaches the end of that entry,
    690 // until Next is called to advance to the next entry.
    691 //
    692 // Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
    693 // TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
    694 // the Header.Size claims.
    695 func (tr *Reader) Read(b []byte) (int, error) {
    696 	if tr.err != nil {
    697 		return 0, tr.err
    698 	}
    699 	if tr.curr == nil {
    700 		return 0, io.EOF
    701 	}
    702 
    703 	n, err := tr.curr.Read(b)
    704 	if err != nil && err != io.EOF {
    705 		tr.err = err
    706 	}
    707 	return n, err
    708 }
    709 
    710 func (rfr *regFileReader) Read(b []byte) (n int, err error) {
    711 	if rfr.nb == 0 {
    712 		// file consumed
    713 		return 0, io.EOF
    714 	}
    715 	if int64(len(b)) > rfr.nb {
    716 		b = b[0:rfr.nb]
    717 	}
    718 	n, err = rfr.r.Read(b)
    719 	rfr.nb -= int64(n)
    720 
    721 	if err == io.EOF && rfr.nb > 0 {
    722 		err = io.ErrUnexpectedEOF
    723 	}
    724 	return
    725 }
    726 
    727 // numBytes returns the number of bytes left to read in the file's data in the tar archive.
    728 func (rfr *regFileReader) numBytes() int64 {
    729 	return rfr.nb
    730 }
    731 
    732 // newSparseFileReader creates a new sparseFileReader, but validates all of the
    733 // sparse entries before doing so.
    734 func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
    735 	if total < 0 {
    736 		return nil, ErrHeader // Total size cannot be negative
    737 	}
    738 
    739 	// Validate all sparse entries. These are the same checks as performed by
    740 	// the BSD tar utility.
    741 	for i, s := range sp {
    742 		switch {
    743 		case s.offset < 0 || s.numBytes < 0:
    744 			return nil, ErrHeader // Negative values are never okay
    745 		case s.offset > math.MaxInt64-s.numBytes:
    746 			return nil, ErrHeader // Integer overflow with large length
    747 		case s.offset+s.numBytes > total:
    748 			return nil, ErrHeader // Region extends beyond the "real" size
    749 		case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
    750 			return nil, ErrHeader // Regions can't overlap and must be in order
    751 		}
    752 	}
    753 	return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
    754 }
    755 
    756 // readHole reads a sparse hole ending at endOffset.
    757 func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
    758 	n64 := endOffset - sfr.pos
    759 	if n64 > int64(len(b)) {
    760 		n64 = int64(len(b))
    761 	}
    762 	n := int(n64)
    763 	for i := 0; i < n; i++ {
    764 		b[i] = 0
    765 	}
    766 	sfr.pos += n64
    767 	return n
    768 }
    769 
    770 // Read reads the sparse file data in expanded form.
    771 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
    772 	// Skip past all empty fragments.
    773 	for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
    774 		sfr.sp = sfr.sp[1:]
    775 	}
    776 
    777 	// If there are no more fragments, then it is possible that there
    778 	// is one last sparse hole.
    779 	if len(sfr.sp) == 0 {
    780 		// This behavior matches the BSD tar utility.
    781 		// However, GNU tar stops returning data even if sfr.total is unmet.
    782 		if sfr.pos < sfr.total {
    783 			return sfr.readHole(b, sfr.total), nil
    784 		}
    785 		return 0, io.EOF
    786 	}
    787 
    788 	// In front of a data fragment, so read a hole.
    789 	if sfr.pos < sfr.sp[0].offset {
    790 		return sfr.readHole(b, sfr.sp[0].offset), nil
    791 	}
    792 
    793 	// In a data fragment, so read from it.
    794 	// This math is overflow free since we verify that offset and numBytes can
    795 	// be safely added when creating the sparseFileReader.
    796 	endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
    797 	bytesLeft := endPos - sfr.pos                   // Bytes left in fragment
    798 	if int64(len(b)) > bytesLeft {
    799 		b = b[:bytesLeft]
    800 	}
    801 
    802 	n, err = sfr.rfr.Read(b)
    803 	sfr.pos += int64(n)
    804 	if err == io.EOF {
    805 		if sfr.pos < endPos {
    806 			err = io.ErrUnexpectedEOF // There was supposed to be more data
    807 		} else if sfr.pos < sfr.total {
    808 			err = nil // There is still an implicit sparse hole at the end
    809 		}
    810 	}
    811 
    812 	if sfr.pos == endPos {
    813 		sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
    814 	}
    815 	return n, err
    816 }
    817 
    818 // numBytes returns the number of bytes left to read in the sparse file's
    819 // sparse-encoded data in the tar archive.
    820 func (sfr *sparseFileReader) numBytes() int64 {
    821 	return sfr.rfr.numBytes()
    822 }
    823