Home | History | Annotate | Download | only in tar
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package tar
      6 
      7 // TODO(dsymonds):
      8 // - catch more errors (no first header, etc.)
      9 
     10 import (
     11 	"bytes"
     12 	"errors"
     13 	"fmt"
     14 	"io"
     15 	"os"
     16 	"path"
     17 	"strconv"
     18 	"strings"
     19 	"time"
     20 )
     21 
     22 var (
     23 	ErrWriteTooLong    = errors.New("archive/tar: write too long")
     24 	ErrFieldTooLong    = errors.New("archive/tar: header field too long")
     25 	ErrWriteAfterClose = errors.New("archive/tar: write after close")
     26 	errNameTooLong     = errors.New("archive/tar: name too long")
     27 	errInvalidHeader   = errors.New("archive/tar: header field too long or contains invalid values")
     28 )
     29 
     30 // A Writer provides sequential writing of a tar archive in POSIX.1 format.
     31 // A tar archive consists of a sequence of files.
     32 // Call WriteHeader to begin a new file, and then call Write to supply that file's data,
     33 // writing at most hdr.Size bytes in total.
     34 type Writer struct {
     35 	w          io.Writer
     36 	err        error
     37 	nb         int64 // number of unwritten bytes for current file entry
     38 	pad        int64 // amount of padding to write after current file entry
     39 	closed     bool
     40 	usedBinary bool            // whether the binary numeric field extension was used
     41 	preferPax  bool            // use pax header instead of binary numeric header
     42 	hdrBuff    [blockSize]byte // buffer to use in writeHeader when writing a regular header
     43 	paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
     44 }
     45 
     46 // NewWriter creates a new Writer writing to w.
     47 func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
     48 
     49 // Flush finishes writing the current file (optional).
     50 func (tw *Writer) Flush() error {
     51 	if tw.nb > 0 {
     52 		tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb)
     53 		return tw.err
     54 	}
     55 
     56 	n := tw.nb + tw.pad
     57 	for n > 0 && tw.err == nil {
     58 		nr := n
     59 		if nr > blockSize {
     60 			nr = blockSize
     61 		}
     62 		var nw int
     63 		nw, tw.err = tw.w.Write(zeroBlock[0:nr])
     64 		n -= int64(nw)
     65 	}
     66 	tw.nb = 0
     67 	tw.pad = 0
     68 	return tw.err
     69 }
     70 
     71 // Write s into b, terminating it with a NUL if there is room.
     72 // If the value is too long for the field and allowPax is true add a paxheader record instead
     73 func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
     74 	needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s)
     75 	if needsPaxHeader {
     76 		paxHeaders[paxKeyword] = s
     77 		return
     78 	}
     79 	if len(s) > len(b) {
     80 		if tw.err == nil {
     81 			tw.err = ErrFieldTooLong
     82 		}
     83 		return
     84 	}
     85 	ascii := toASCII(s)
     86 	copy(b, ascii)
     87 	if len(ascii) < len(b) {
     88 		b[len(ascii)] = 0
     89 	}
     90 }
     91 
     92 // Encode x as an octal ASCII string and write it into b with leading zeros.
     93 func (tw *Writer) octal(b []byte, x int64) {
     94 	s := strconv.FormatInt(x, 8)
     95 	// leading zeros, but leave room for a NUL.
     96 	for len(s)+1 < len(b) {
     97 		s = "0" + s
     98 	}
     99 	tw.cString(b, s, false, paxNone, nil)
    100 }
    101 
    102 // Write x into b, either as octal or as binary (GNUtar/star extension).
    103 // If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead
    104 func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
    105 	// Try octal first.
    106 	s := strconv.FormatInt(x, 8)
    107 	if len(s) < len(b) {
    108 		tw.octal(b, x)
    109 		return
    110 	}
    111 
    112 	// If it is too long for octal, and pax is preferred, use a pax header
    113 	if allowPax && tw.preferPax {
    114 		tw.octal(b, 0)
    115 		s := strconv.FormatInt(x, 10)
    116 		paxHeaders[paxKeyword] = s
    117 		return
    118 	}
    119 
    120 	// Too big: use binary (big-endian).
    121 	tw.usedBinary = true
    122 	for i := len(b) - 1; x > 0 && i >= 0; i-- {
    123 		b[i] = byte(x)
    124 		x >>= 8
    125 	}
    126 	b[0] |= 0x80 // highest bit indicates binary format
    127 }
    128 
    129 var (
    130 	minTime = time.Unix(0, 0)
    131 	// There is room for 11 octal digits (33 bits) of mtime.
    132 	maxTime = minTime.Add((1<<33 - 1) * time.Second)
    133 )
    134 
    135 // WriteHeader writes hdr and prepares to accept the file's contents.
    136 // WriteHeader calls Flush if it is not the first header.
    137 // Calling after a Close will return ErrWriteAfterClose.
    138 func (tw *Writer) WriteHeader(hdr *Header) error {
    139 	return tw.writeHeader(hdr, true)
    140 }
    141 
    142 // WriteHeader writes hdr and prepares to accept the file's contents.
    143 // WriteHeader calls Flush if it is not the first header.
    144 // Calling after a Close will return ErrWriteAfterClose.
    145 // As this method is called internally by writePax header to allow it to
    146 // suppress writing the pax header.
    147 func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
    148 	if tw.closed {
    149 		return ErrWriteAfterClose
    150 	}
    151 	if tw.err == nil {
    152 		tw.Flush()
    153 	}
    154 	if tw.err != nil {
    155 		return tw.err
    156 	}
    157 
    158 	// a map to hold pax header records, if any are needed
    159 	paxHeaders := make(map[string]string)
    160 
    161 	// TODO(shanemhansen): we might want to use PAX headers for
    162 	// subsecond time resolution, but for now let's just capture
    163 	// too long fields or non ascii characters
    164 
    165 	var header []byte
    166 
    167 	// We need to select which scratch buffer to use carefully,
    168 	// since this method is called recursively to write PAX headers.
    169 	// If allowPax is true, this is the non-recursive call, and we will use hdrBuff.
    170 	// If allowPax is false, we are being called by writePAXHeader, and hdrBuff is
    171 	// already being used by the non-recursive call, so we must use paxHdrBuff.
    172 	header = tw.hdrBuff[:]
    173 	if !allowPax {
    174 		header = tw.paxHdrBuff[:]
    175 	}
    176 	copy(header, zeroBlock)
    177 	s := slicer(header)
    178 
    179 	// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
    180 	pathHeaderBytes := s.next(fileNameSize)
    181 
    182 	tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders)
    183 
    184 	// Handle out of range ModTime carefully.
    185 	var modTime int64
    186 	if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) {
    187 		modTime = hdr.ModTime.Unix()
    188 	}
    189 
    190 	tw.octal(s.next(8), hdr.Mode)                                   // 100:108
    191 	tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116
    192 	tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124
    193 	tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders)     // 124:136
    194 	tw.numeric(s.next(12), modTime, false, paxNone, nil)            // 136:148 --- consider using pax for finer granularity
    195 	s.next(8)                                                       // chksum (148:156)
    196 	s.next(1)[0] = hdr.Typeflag                                     // 156:157
    197 
    198 	tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders)
    199 
    200 	copy(s.next(8), []byte("ustar\x0000"))                        // 257:265
    201 	tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297
    202 	tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329
    203 	tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil)      // 329:337
    204 	tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil)      // 337:345
    205 
    206 	// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
    207 	prefixHeaderBytes := s.next(155)
    208 	tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500  prefix
    209 
    210 	// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
    211 	if tw.usedBinary {
    212 		copy(header[257:265], []byte("ustar  \x00"))
    213 	}
    214 
    215 	_, paxPathUsed := paxHeaders[paxPath]
    216 	// try to use a ustar header when only the name is too long
    217 	if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
    218 		suffix := hdr.Name
    219 		prefix := ""
    220 		if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) {
    221 			var err error
    222 			prefix, suffix, err = tw.splitUSTARLongName(hdr.Name)
    223 			if err == nil {
    224 				// ok we can use a ustar long name instead of pax, now correct the fields
    225 
    226 				// remove the path field from the pax header. this will suppress the pax header
    227 				delete(paxHeaders, paxPath)
    228 
    229 				// update the path fields
    230 				tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
    231 				tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
    232 
    233 				// Use the ustar magic if we used ustar long names.
    234 				if len(prefix) > 0 && !tw.usedBinary {
    235 					copy(header[257:265], []byte("ustar\x00"))
    236 				}
    237 			}
    238 		}
    239 	}
    240 
    241 	// The chksum field is terminated by a NUL and a space.
    242 	// This is different from the other octal fields.
    243 	chksum, _ := checksum(header)
    244 	tw.octal(header[148:155], chksum)
    245 	header[155] = ' '
    246 
    247 	if tw.err != nil {
    248 		// problem with header; probably integer too big for a field.
    249 		return tw.err
    250 	}
    251 
    252 	if allowPax {
    253 		for k, v := range hdr.Xattrs {
    254 			paxHeaders[paxXattr+k] = v
    255 		}
    256 	}
    257 
    258 	if len(paxHeaders) > 0 {
    259 		if !allowPax {
    260 			return errInvalidHeader
    261 		}
    262 		if err := tw.writePAXHeader(hdr, paxHeaders); err != nil {
    263 			return err
    264 		}
    265 	}
    266 	tw.nb = int64(hdr.Size)
    267 	tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize
    268 
    269 	_, tw.err = tw.w.Write(header)
    270 	return tw.err
    271 }
    272 
    273 // writeUSTARLongName splits a USTAR long name hdr.Name.
    274 // name must be < 256 characters. errNameTooLong is returned
    275 // if hdr.Name can't be split. The splitting heuristic
    276 // is compatible with gnu tar.
    277 func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) {
    278 	length := len(name)
    279 	if length > fileNamePrefixSize+1 {
    280 		length = fileNamePrefixSize + 1
    281 	} else if name[length-1] == '/' {
    282 		length--
    283 	}
    284 	i := strings.LastIndex(name[:length], "/")
    285 	// nlen contains the resulting length in the name field.
    286 	// plen contains the resulting length in the prefix field.
    287 	nlen := len(name) - i - 1
    288 	plen := i
    289 	if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
    290 		err = errNameTooLong
    291 		return
    292 	}
    293 	prefix, suffix = name[:i], name[i+1:]
    294 	return
    295 }
    296 
    297 // writePaxHeader writes an extended pax header to the
    298 // archive.
    299 func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error {
    300 	// Prepare extended header
    301 	ext := new(Header)
    302 	ext.Typeflag = TypeXHeader
    303 	// Setting ModTime is required for reader parsing to
    304 	// succeed, and seems harmless enough.
    305 	ext.ModTime = hdr.ModTime
    306 	// The spec asks that we namespace our pseudo files
    307 	// with the current pid.
    308 	pid := os.Getpid()
    309 	dir, file := path.Split(hdr.Name)
    310 	fullName := path.Join(dir,
    311 		fmt.Sprintf("PaxHeaders.%d", pid), file)
    312 
    313 	ascii := toASCII(fullName)
    314 	if len(ascii) > 100 {
    315 		ascii = ascii[:100]
    316 	}
    317 	ext.Name = ascii
    318 	// Construct the body
    319 	var buf bytes.Buffer
    320 
    321 	for k, v := range paxHeaders {
    322 		fmt.Fprint(&buf, paxHeader(k+"="+v))
    323 	}
    324 
    325 	ext.Size = int64(len(buf.Bytes()))
    326 	if err := tw.writeHeader(ext, false); err != nil {
    327 		return err
    328 	}
    329 	if _, err := tw.Write(buf.Bytes()); err != nil {
    330 		return err
    331 	}
    332 	if err := tw.Flush(); err != nil {
    333 		return err
    334 	}
    335 	return nil
    336 }
    337 
    338 // paxHeader formats a single pax record, prefixing it with the appropriate length
    339 func paxHeader(msg string) string {
    340 	const padding = 2 // Extra padding for space and newline
    341 	size := len(msg) + padding
    342 	size += len(strconv.Itoa(size))
    343 	record := fmt.Sprintf("%d %s\n", size, msg)
    344 	if len(record) != size {
    345 		// Final adjustment if adding size increased
    346 		// the number of digits in size
    347 		size = len(record)
    348 		record = fmt.Sprintf("%d %s\n", size, msg)
    349 	}
    350 	return record
    351 }
    352 
    353 // Write writes to the current entry in the tar archive.
    354 // Write returns the error ErrWriteTooLong if more than
    355 // hdr.Size bytes are written after WriteHeader.
    356 func (tw *Writer) Write(b []byte) (n int, err error) {
    357 	if tw.closed {
    358 		err = ErrWriteAfterClose
    359 		return
    360 	}
    361 	overwrite := false
    362 	if int64(len(b)) > tw.nb {
    363 		b = b[0:tw.nb]
    364 		overwrite = true
    365 	}
    366 	n, err = tw.w.Write(b)
    367 	tw.nb -= int64(n)
    368 	if err == nil && overwrite {
    369 		err = ErrWriteTooLong
    370 		return
    371 	}
    372 	tw.err = err
    373 	return
    374 }
    375 
    376 // Close closes the tar archive, flushing any unwritten
    377 // data to the underlying writer.
    378 func (tw *Writer) Close() error {
    379 	if tw.err != nil || tw.closed {
    380 		return tw.err
    381 	}
    382 	tw.Flush()
    383 	tw.closed = true
    384 	if tw.err != nil {
    385 		return tw.err
    386 	}
    387 
    388 	// trailer: two zero blocks
    389 	for i := 0; i < 2; i++ {
    390 		_, tw.err = tw.w.Write(zeroBlock)
    391 		if tw.err != nil {
    392 			break
    393 		}
    394 	}
    395 	return tw.err
    396 }
    397