1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 import "strings" 8 9 // Format represents the tar archive format. 10 // 11 // The original tar format was introduced in Unix V7. 12 // Since then, there have been multiple competing formats attempting to 13 // standardize or extend the V7 format to overcome its limitations. 14 // The most common formats are the USTAR, PAX, and GNU formats, 15 // each with their own advantages and limitations. 16 // 17 // The following table captures the capabilities of each format: 18 // 19 // | USTAR | PAX | GNU 20 // ------------------+--------+-----------+---------- 21 // Name | 256B | unlimited | unlimited 22 // Linkname | 100B | unlimited | unlimited 23 // Size | uint33 | unlimited | uint89 24 // Mode | uint21 | uint21 | uint57 25 // Uid/Gid | uint21 | unlimited | uint57 26 // Uname/Gname | 32B | unlimited | 32B 27 // ModTime | uint33 | unlimited | int89 28 // AccessTime | n/a | unlimited | int89 29 // ChangeTime | n/a | unlimited | int89 30 // Devmajor/Devminor | uint21 | uint21 | uint57 31 // ------------------+--------+-----------+---------- 32 // string encoding | ASCII | UTF-8 | binary 33 // sub-second times | no | yes | no 34 // sparse files | no | yes | yes 35 // 36 // The table's upper portion shows the Header fields, where each format reports 37 // the maximum number of bytes allowed for each string field and 38 // the integer type used to store each numeric field 39 // (where timestamps are stored as the number of seconds since the Unix epoch). 40 // 41 // The table's lower portion shows specialized features of each format, 42 // such as supported string encodings, support for sub-second timestamps, 43 // or support for sparse files. 44 // 45 // The Writer currently provides no support for sparse files. 46 type Format int 47 48 // Constants to identify various tar formats. 49 const ( 50 // Deliberately hide the meaning of constants from public API. 51 _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc... 52 53 // FormatUnknown indicates that the format is unknown. 54 FormatUnknown 55 56 // The format of the original Unix V7 tar tool prior to standardization. 57 formatV7 58 59 // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988. 60 // 61 // While this format is compatible with most tar readers, 62 // the format has several limitations making it unsuitable for some usages. 63 // Most notably, it cannot support sparse files, files larger than 8GiB, 64 // filenames larger than 256 characters, and non-ASCII filenames. 65 // 66 // Reference: 67 // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 68 FormatUSTAR 69 70 // FormatPAX represents the PAX header format defined in POSIX.1-2001. 71 // 72 // PAX extends USTAR by writing a special file with Typeflag TypeXHeader 73 // preceding the original header. This file contains a set of key-value 74 // records, which are used to overcome USTAR's shortcomings, in addition to 75 // providing the ability to have sub-second resolution for timestamps. 76 // 77 // Some newer formats add their own extensions to PAX by defining their 78 // own keys and assigning certain semantic meaning to the associated values. 79 // For example, sparse file support in PAX is implemented using keys 80 // defined by the GNU manual (e.g., "GNU.sparse.map"). 81 // 82 // Reference: 83 // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html 84 FormatPAX 85 86 // FormatGNU represents the GNU header format. 87 // 88 // The GNU header format is older than the USTAR and PAX standards and 89 // is not compatible with them. The GNU format supports 90 // arbitrary file sizes, filenames of arbitrary encoding and length, 91 // sparse files, and other features. 92 // 93 // It is recommended that PAX be chosen over GNU unless the target 94 // application can only parse GNU formatted archives. 95 // 96 // Reference: 97 // http://www.gnu.org/software/tar/manual/html_node/Standard.html 98 FormatGNU 99 100 // Schily's tar format, which is incompatible with USTAR. 101 // This does not cover STAR extensions to the PAX format; these fall under 102 // the PAX format. 103 formatSTAR 104 105 formatMax 106 ) 107 108 func (f Format) has(f2 Format) bool { return f&f2 != 0 } 109 func (f *Format) mayBe(f2 Format) { *f |= f2 } 110 func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 } 111 func (f *Format) mustNotBe(f2 Format) { *f &^= f2 } 112 113 var formatNames = map[Format]string{ 114 formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR", 115 } 116 117 func (f Format) String() string { 118 var ss []string 119 for f2 := Format(1); f2 < formatMax; f2 <<= 1 { 120 if f.has(f2) { 121 ss = append(ss, formatNames[f2]) 122 } 123 } 124 switch len(ss) { 125 case 0: 126 return "<unknown>" 127 case 1: 128 return ss[0] 129 default: 130 return "(" + strings.Join(ss, " | ") + ")" 131 } 132 } 133 134 // Magics used to identify various formats. 135 const ( 136 magicGNU, versionGNU = "ustar ", " \x00" 137 magicUSTAR, versionUSTAR = "ustar\x00", "00" 138 trailerSTAR = "tar\x00" 139 ) 140 141 // Size constants from various tar specifications. 142 const ( 143 blockSize = 512 // Size of each block in a tar stream 144 nameSize = 100 // Max length of the name field in USTAR format 145 prefixSize = 155 // Max length of the prefix field in USTAR format 146 ) 147 148 // blockPadding computes the number of bytes needed to pad offset up to the 149 // nearest block edge where 0 <= n < blockSize. 150 func blockPadding(offset int64) (n int64) { 151 return -offset & (blockSize - 1) 152 } 153 154 var zeroBlock block 155 156 type block [blockSize]byte 157 158 // Convert block to any number of formats. 159 func (b *block) V7() *headerV7 { return (*headerV7)(b) } 160 func (b *block) GNU() *headerGNU { return (*headerGNU)(b) } 161 func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) } 162 func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) } 163 func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) } 164 165 // GetFormat checks that the block is a valid tar header based on the checksum. 166 // It then attempts to guess the specific format based on magic values. 167 // If the checksum fails, then FormatUnknown is returned. 168 func (b *block) GetFormat() Format { 169 // Verify checksum. 170 var p parser 171 value := p.parseOctal(b.V7().Chksum()) 172 chksum1, chksum2 := b.ComputeChecksum() 173 if p.err != nil || (value != chksum1 && value != chksum2) { 174 return FormatUnknown 175 } 176 177 // Guess the magic values. 178 magic := string(b.USTAR().Magic()) 179 version := string(b.USTAR().Version()) 180 trailer := string(b.STAR().Trailer()) 181 switch { 182 case magic == magicUSTAR && trailer == trailerSTAR: 183 return formatSTAR 184 case magic == magicUSTAR: 185 return FormatUSTAR | FormatPAX 186 case magic == magicGNU && version == versionGNU: 187 return FormatGNU 188 default: 189 return formatV7 190 } 191 } 192 193 // SetFormat writes the magic values necessary for specified format 194 // and then updates the checksum accordingly. 195 func (b *block) SetFormat(format Format) { 196 // Set the magic values. 197 switch { 198 case format.has(formatV7): 199 // Do nothing. 200 case format.has(FormatGNU): 201 copy(b.GNU().Magic(), magicGNU) 202 copy(b.GNU().Version(), versionGNU) 203 case format.has(formatSTAR): 204 copy(b.STAR().Magic(), magicUSTAR) 205 copy(b.STAR().Version(), versionUSTAR) 206 copy(b.STAR().Trailer(), trailerSTAR) 207 case format.has(FormatUSTAR | FormatPAX): 208 copy(b.USTAR().Magic(), magicUSTAR) 209 copy(b.USTAR().Version(), versionUSTAR) 210 default: 211 panic("invalid format") 212 } 213 214 // Update checksum. 215 // This field is special in that it is terminated by a NULL then space. 216 var f formatter 217 field := b.V7().Chksum() 218 chksum, _ := b.ComputeChecksum() // Possible values are 256..128776 219 f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143 220 field[7] = ' ' 221 } 222 223 // ComputeChecksum computes the checksum for the header block. 224 // POSIX specifies a sum of the unsigned byte values, but the Sun tar used 225 // signed byte values. 226 // We compute and return both. 227 func (b *block) ComputeChecksum() (unsigned, signed int64) { 228 for i, c := range b { 229 if 148 <= i && i < 156 { 230 c = ' ' // Treat the checksum field itself as all spaces. 231 } 232 unsigned += int64(c) 233 signed += int64(int8(c)) 234 } 235 return unsigned, signed 236 } 237 238 // Reset clears the block with all zeros. 239 func (b *block) Reset() { 240 *b = block{} 241 } 242 243 type headerV7 [blockSize]byte 244 245 func (h *headerV7) Name() []byte { return h[000:][:100] } 246 func (h *headerV7) Mode() []byte { return h[100:][:8] } 247 func (h *headerV7) UID() []byte { return h[108:][:8] } 248 func (h *headerV7) GID() []byte { return h[116:][:8] } 249 func (h *headerV7) Size() []byte { return h[124:][:12] } 250 func (h *headerV7) ModTime() []byte { return h[136:][:12] } 251 func (h *headerV7) Chksum() []byte { return h[148:][:8] } 252 func (h *headerV7) TypeFlag() []byte { return h[156:][:1] } 253 func (h *headerV7) LinkName() []byte { return h[157:][:100] } 254 255 type headerGNU [blockSize]byte 256 257 func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) } 258 func (h *headerGNU) Magic() []byte { return h[257:][:6] } 259 func (h *headerGNU) Version() []byte { return h[263:][:2] } 260 func (h *headerGNU) UserName() []byte { return h[265:][:32] } 261 func (h *headerGNU) GroupName() []byte { return h[297:][:32] } 262 func (h *headerGNU) DevMajor() []byte { return h[329:][:8] } 263 func (h *headerGNU) DevMinor() []byte { return h[337:][:8] } 264 func (h *headerGNU) AccessTime() []byte { return h[345:][:12] } 265 func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] } 266 func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) } 267 func (h *headerGNU) RealSize() []byte { return h[483:][:12] } 268 269 type headerSTAR [blockSize]byte 270 271 func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) } 272 func (h *headerSTAR) Magic() []byte { return h[257:][:6] } 273 func (h *headerSTAR) Version() []byte { return h[263:][:2] } 274 func (h *headerSTAR) UserName() []byte { return h[265:][:32] } 275 func (h *headerSTAR) GroupName() []byte { return h[297:][:32] } 276 func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] } 277 func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] } 278 func (h *headerSTAR) Prefix() []byte { return h[345:][:131] } 279 func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] } 280 func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] } 281 func (h *headerSTAR) Trailer() []byte { return h[508:][:4] } 282 283 type headerUSTAR [blockSize]byte 284 285 func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) } 286 func (h *headerUSTAR) Magic() []byte { return h[257:][:6] } 287 func (h *headerUSTAR) Version() []byte { return h[263:][:2] } 288 func (h *headerUSTAR) UserName() []byte { return h[265:][:32] } 289 func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] } 290 func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] } 291 func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] } 292 func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] } 293 294 type sparseArray []byte 295 296 func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) } 297 func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] } 298 func (s sparseArray) MaxEntries() int { return len(s) / 24 } 299 300 type sparseElem []byte 301 302 func (s sparseElem) Offset() []byte { return s[00:][:12] } 303 func (s sparseElem) Length() []byte { return s[12:][:12] } 304