Home | History | Annotate | Download | only in tar
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package tar implements access to tar archives.
      6 // It aims to cover most of the variations, including those produced
      7 // by GNU and BSD tars.
      8 //
      9 // References:
     10 //   http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
     11 //   http://www.gnu.org/software/tar/manual/html_node/Standard.html
     12 //   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
     13 package tar
     14 
     15 import (
     16 	"bytes"
     17 	"errors"
     18 	"fmt"
     19 	"os"
     20 	"path"
     21 	"time"
     22 )
     23 
     24 const (
     25 	blockSize = 512
     26 
     27 	// Types
     28 	TypeReg           = '0'    // regular file
     29 	TypeRegA          = '\x00' // regular file
     30 	TypeLink          = '1'    // hard link
     31 	TypeSymlink       = '2'    // symbolic link
     32 	TypeChar          = '3'    // character device node
     33 	TypeBlock         = '4'    // block device node
     34 	TypeDir           = '5'    // directory
     35 	TypeFifo          = '6'    // fifo node
     36 	TypeCont          = '7'    // reserved
     37 	TypeXHeader       = 'x'    // extended header
     38 	TypeXGlobalHeader = 'g'    // global extended header
     39 	TypeGNULongName   = 'L'    // Next file has a long name
     40 	TypeGNULongLink   = 'K'    // Next file symlinks to a file w/ a long name
     41 	TypeGNUSparse     = 'S'    // sparse file
     42 )
     43 
     44 // A Header represents a single header in a tar archive.
     45 // Some fields may not be populated.
     46 type Header struct {
     47 	Name       string    // name of header file entry
     48 	Mode       int64     // permission and mode bits
     49 	Uid        int       // user id of owner
     50 	Gid        int       // group id of owner
     51 	Size       int64     // length in bytes
     52 	ModTime    time.Time // modified time
     53 	Typeflag   byte      // type of header entry
     54 	Linkname   string    // target name of link
     55 	Uname      string    // user name of owner
     56 	Gname      string    // group name of owner
     57 	Devmajor   int64     // major number of character or block device
     58 	Devminor   int64     // minor number of character or block device
     59 	AccessTime time.Time // access time
     60 	ChangeTime time.Time // status change time
     61 	Xattrs     map[string]string
     62 }
     63 
     64 // File name constants from the tar spec.
     65 const (
     66 	fileNameSize       = 100 // Maximum number of bytes in a standard tar name.
     67 	fileNamePrefixSize = 155 // Maximum number of ustar extension bytes.
     68 )
     69 
     70 // FileInfo returns an os.FileInfo for the Header.
     71 func (h *Header) FileInfo() os.FileInfo {
     72 	return headerFileInfo{h}
     73 }
     74 
     75 // headerFileInfo implements os.FileInfo.
     76 type headerFileInfo struct {
     77 	h *Header
     78 }
     79 
     80 func (fi headerFileInfo) Size() int64        { return fi.h.Size }
     81 func (fi headerFileInfo) IsDir() bool        { return fi.Mode().IsDir() }
     82 func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime }
     83 func (fi headerFileInfo) Sys() interface{}   { return fi.h }
     84 
     85 // Name returns the base name of the file.
     86 func (fi headerFileInfo) Name() string {
     87 	if fi.IsDir() {
     88 		return path.Base(path.Clean(fi.h.Name))
     89 	}
     90 	return path.Base(fi.h.Name)
     91 }
     92 
     93 // Mode returns the permission and mode bits for the headerFileInfo.
     94 func (fi headerFileInfo) Mode() (mode os.FileMode) {
     95 	// Set file permission bits.
     96 	mode = os.FileMode(fi.h.Mode).Perm()
     97 
     98 	// Set setuid, setgid and sticky bits.
     99 	if fi.h.Mode&c_ISUID != 0 {
    100 		// setuid
    101 		mode |= os.ModeSetuid
    102 	}
    103 	if fi.h.Mode&c_ISGID != 0 {
    104 		// setgid
    105 		mode |= os.ModeSetgid
    106 	}
    107 	if fi.h.Mode&c_ISVTX != 0 {
    108 		// sticky
    109 		mode |= os.ModeSticky
    110 	}
    111 
    112 	// Set file mode bits.
    113 	// clear perm, setuid, setgid and sticky bits.
    114 	m := os.FileMode(fi.h.Mode) &^ 07777
    115 	if m == c_ISDIR {
    116 		// directory
    117 		mode |= os.ModeDir
    118 	}
    119 	if m == c_ISFIFO {
    120 		// named pipe (FIFO)
    121 		mode |= os.ModeNamedPipe
    122 	}
    123 	if m == c_ISLNK {
    124 		// symbolic link
    125 		mode |= os.ModeSymlink
    126 	}
    127 	if m == c_ISBLK {
    128 		// device file
    129 		mode |= os.ModeDevice
    130 	}
    131 	if m == c_ISCHR {
    132 		// Unix character device
    133 		mode |= os.ModeDevice
    134 		mode |= os.ModeCharDevice
    135 	}
    136 	if m == c_ISSOCK {
    137 		// Unix domain socket
    138 		mode |= os.ModeSocket
    139 	}
    140 
    141 	switch fi.h.Typeflag {
    142 	case TypeSymlink:
    143 		// symbolic link
    144 		mode |= os.ModeSymlink
    145 	case TypeChar:
    146 		// character device node
    147 		mode |= os.ModeDevice
    148 		mode |= os.ModeCharDevice
    149 	case TypeBlock:
    150 		// block device node
    151 		mode |= os.ModeDevice
    152 	case TypeDir:
    153 		// directory
    154 		mode |= os.ModeDir
    155 	case TypeFifo:
    156 		// fifo node
    157 		mode |= os.ModeNamedPipe
    158 	}
    159 
    160 	return mode
    161 }
    162 
    163 // sysStat, if non-nil, populates h from system-dependent fields of fi.
    164 var sysStat func(fi os.FileInfo, h *Header) error
    165 
    166 // Mode constants from the tar spec.
    167 const (
    168 	c_ISUID  = 04000   // Set uid
    169 	c_ISGID  = 02000   // Set gid
    170 	c_ISVTX  = 01000   // Save text (sticky bit)
    171 	c_ISDIR  = 040000  // Directory
    172 	c_ISFIFO = 010000  // FIFO
    173 	c_ISREG  = 0100000 // Regular file
    174 	c_ISLNK  = 0120000 // Symbolic link
    175 	c_ISBLK  = 060000  // Block special file
    176 	c_ISCHR  = 020000  // Character special file
    177 	c_ISSOCK = 0140000 // Socket
    178 )
    179 
    180 // Keywords for the PAX Extended Header
    181 const (
    182 	paxAtime    = "atime"
    183 	paxCharset  = "charset"
    184 	paxComment  = "comment"
    185 	paxCtime    = "ctime" // please note that ctime is not a valid pax header.
    186 	paxGid      = "gid"
    187 	paxGname    = "gname"
    188 	paxLinkpath = "linkpath"
    189 	paxMtime    = "mtime"
    190 	paxPath     = "path"
    191 	paxSize     = "size"
    192 	paxUid      = "uid"
    193 	paxUname    = "uname"
    194 	paxXattr    = "SCHILY.xattr."
    195 	paxNone     = ""
    196 )
    197 
    198 // FileInfoHeader creates a partially-populated Header from fi.
    199 // If fi describes a symlink, FileInfoHeader records link as the link target.
    200 // If fi describes a directory, a slash is appended to the name.
    201 // Because os.FileInfo's Name method returns only the base name of
    202 // the file it describes, it may be necessary to modify the Name field
    203 // of the returned header to provide the full path name of the file.
    204 func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
    205 	if fi == nil {
    206 		return nil, errors.New("tar: FileInfo is nil")
    207 	}
    208 	fm := fi.Mode()
    209 	h := &Header{
    210 		Name:    fi.Name(),
    211 		ModTime: fi.ModTime(),
    212 		Mode:    int64(fm.Perm()), // or'd with c_IS* constants later
    213 	}
    214 	switch {
    215 	case fm.IsRegular():
    216 		h.Mode |= c_ISREG
    217 		h.Typeflag = TypeReg
    218 		h.Size = fi.Size()
    219 	case fi.IsDir():
    220 		h.Typeflag = TypeDir
    221 		h.Mode |= c_ISDIR
    222 		h.Name += "/"
    223 	case fm&os.ModeSymlink != 0:
    224 		h.Typeflag = TypeSymlink
    225 		h.Mode |= c_ISLNK
    226 		h.Linkname = link
    227 	case fm&os.ModeDevice != 0:
    228 		if fm&os.ModeCharDevice != 0 {
    229 			h.Mode |= c_ISCHR
    230 			h.Typeflag = TypeChar
    231 		} else {
    232 			h.Mode |= c_ISBLK
    233 			h.Typeflag = TypeBlock
    234 		}
    235 	case fm&os.ModeNamedPipe != 0:
    236 		h.Typeflag = TypeFifo
    237 		h.Mode |= c_ISFIFO
    238 	case fm&os.ModeSocket != 0:
    239 		h.Mode |= c_ISSOCK
    240 	default:
    241 		return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm)
    242 	}
    243 	if fm&os.ModeSetuid != 0 {
    244 		h.Mode |= c_ISUID
    245 	}
    246 	if fm&os.ModeSetgid != 0 {
    247 		h.Mode |= c_ISGID
    248 	}
    249 	if fm&os.ModeSticky != 0 {
    250 		h.Mode |= c_ISVTX
    251 	}
    252 	// If possible, populate additional fields from OS-specific
    253 	// FileInfo fields.
    254 	if sys, ok := fi.Sys().(*Header); ok {
    255 		// This FileInfo came from a Header (not the OS). Use the
    256 		// original Header to populate all remaining fields.
    257 		h.Uid = sys.Uid
    258 		h.Gid = sys.Gid
    259 		h.Uname = sys.Uname
    260 		h.Gname = sys.Gname
    261 		h.AccessTime = sys.AccessTime
    262 		h.ChangeTime = sys.ChangeTime
    263 		if sys.Xattrs != nil {
    264 			h.Xattrs = make(map[string]string)
    265 			for k, v := range sys.Xattrs {
    266 				h.Xattrs[k] = v
    267 			}
    268 		}
    269 		if sys.Typeflag == TypeLink {
    270 			// hard link
    271 			h.Typeflag = TypeLink
    272 			h.Size = 0
    273 			h.Linkname = sys.Linkname
    274 		}
    275 	}
    276 	if sysStat != nil {
    277 		return h, sysStat(fi, h)
    278 	}
    279 	return h, nil
    280 }
    281 
    282 var zeroBlock = make([]byte, blockSize)
    283 
    284 // POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values.
    285 // We compute and return both.
    286 func checksum(header []byte) (unsigned int64, signed int64) {
    287 	for i := 0; i < len(header); i++ {
    288 		if i == 148 {
    289 			// The chksum field (header[148:156]) is special: it should be treated as space bytes.
    290 			unsigned += ' ' * 8
    291 			signed += ' ' * 8
    292 			i += 7
    293 			continue
    294 		}
    295 		unsigned += int64(header[i])
    296 		signed += int64(int8(header[i]))
    297 	}
    298 	return
    299 }
    300 
    301 type slicer []byte
    302 
    303 func (sp *slicer) next(n int) (b []byte) {
    304 	s := *sp
    305 	b, *sp = s[0:n], s[n:]
    306 	return
    307 }
    308 
    309 func isASCII(s string) bool {
    310 	for _, c := range s {
    311 		if c >= 0x80 {
    312 			return false
    313 		}
    314 	}
    315 	return true
    316 }
    317 
    318 func toASCII(s string) string {
    319 	if isASCII(s) {
    320 		return s
    321 	}
    322 	var buf bytes.Buffer
    323 	for _, c := range s {
    324 		if c < 0x80 {
    325 			buf.WriteByte(byte(c))
    326 		}
    327 	}
    328 	return buf.String()
    329 }
    330