Home | History | Annotate | Download | only in zip
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package zip
      6 
      7 import (
      8 	"bufio"
      9 	"encoding/binary"
     10 	"errors"
     11 	"hash"
     12 	"hash/crc32"
     13 	"io"
     14 	"unicode/utf8"
     15 )
     16 
     17 var (
     18 	errLongName  = errors.New("zip: FileHeader.Name too long")
     19 	errLongExtra = errors.New("zip: FileHeader.Extra too long")
     20 )
     21 
     22 // Writer implements a zip file writer.
     23 type Writer struct {
     24 	cw          *countWriter
     25 	dir         []*header
     26 	last        *fileWriter
     27 	closed      bool
     28 	compressors map[uint16]Compressor
     29 	comment     string
     30 
     31 	// testHookCloseSizeOffset if non-nil is called with the size
     32 	// of offset of the central directory at Close.
     33 	testHookCloseSizeOffset func(size, offset uint64)
     34 }
     35 
     36 type header struct {
     37 	*FileHeader
     38 	offset uint64
     39 }
     40 
     41 // NewWriter returns a new Writer writing a zip file to w.
     42 func NewWriter(w io.Writer) *Writer {
     43 	return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}}
     44 }
     45 
     46 // SetOffset sets the offset of the beginning of the zip data within the
     47 // underlying writer. It should be used when the zip data is appended to an
     48 // existing file, such as a binary executable.
     49 // It must be called before any data is written.
     50 func (w *Writer) SetOffset(n int64) {
     51 	if w.cw.count != 0 {
     52 		panic("zip: SetOffset called after data was written")
     53 	}
     54 	w.cw.count = n
     55 }
     56 
     57 // Flush flushes any buffered data to the underlying writer.
     58 // Calling Flush is not normally necessary; calling Close is sufficient.
     59 func (w *Writer) Flush() error {
     60 	return w.cw.w.(*bufio.Writer).Flush()
     61 }
     62 
     63 // SetComment sets the end-of-central-directory comment field.
     64 // It can only be called before Close.
     65 func (w *Writer) SetComment(comment string) error {
     66 	if len(comment) > uint16max {
     67 		return errors.New("zip: Writer.Comment too long")
     68 	}
     69 	w.comment = comment
     70 	return nil
     71 }
     72 
     73 // Close finishes writing the zip file by writing the central directory.
     74 // It does not (and cannot) close the underlying writer.
     75 func (w *Writer) Close() error {
     76 	if w.last != nil && !w.last.closed {
     77 		if err := w.last.close(); err != nil {
     78 			return err
     79 		}
     80 		w.last = nil
     81 	}
     82 	if w.closed {
     83 		return errors.New("zip: writer closed twice")
     84 	}
     85 	w.closed = true
     86 
     87 	// write central directory
     88 	start := w.cw.count
     89 	for _, h := range w.dir {
     90 		var buf [directoryHeaderLen]byte
     91 		b := writeBuf(buf[:])
     92 		b.uint32(uint32(directoryHeaderSignature))
     93 		b.uint16(h.CreatorVersion)
     94 		b.uint16(h.ReaderVersion)
     95 		b.uint16(h.Flags)
     96 		b.uint16(h.Method)
     97 		b.uint16(h.ModifiedTime)
     98 		b.uint16(h.ModifiedDate)
     99 		b.uint32(h.CRC32)
    100 		if h.isZip64() || h.offset >= uint32max {
    101 			// the file needs a zip64 header. store maxint in both
    102 			// 32 bit size fields (and offset later) to signal that the
    103 			// zip64 extra header should be used.
    104 			b.uint32(uint32max) // compressed size
    105 			b.uint32(uint32max) // uncompressed size
    106 
    107 			// append a zip64 extra block to Extra
    108 			var buf [28]byte // 2x uint16 + 3x uint64
    109 			eb := writeBuf(buf[:])
    110 			eb.uint16(zip64ExtraID)
    111 			eb.uint16(24) // size = 3x uint64
    112 			eb.uint64(h.UncompressedSize64)
    113 			eb.uint64(h.CompressedSize64)
    114 			eb.uint64(h.offset)
    115 			h.Extra = append(h.Extra, buf[:]...)
    116 		} else {
    117 			b.uint32(h.CompressedSize)
    118 			b.uint32(h.UncompressedSize)
    119 		}
    120 
    121 		b.uint16(uint16(len(h.Name)))
    122 		b.uint16(uint16(len(h.Extra)))
    123 		b.uint16(uint16(len(h.Comment)))
    124 		b = b[4:] // skip disk number start and internal file attr (2x uint16)
    125 		b.uint32(h.ExternalAttrs)
    126 		if h.offset > uint32max {
    127 			b.uint32(uint32max)
    128 		} else {
    129 			b.uint32(uint32(h.offset))
    130 		}
    131 		if _, err := w.cw.Write(buf[:]); err != nil {
    132 			return err
    133 		}
    134 		if _, err := io.WriteString(w.cw, h.Name); err != nil {
    135 			return err
    136 		}
    137 		if _, err := w.cw.Write(h.Extra); err != nil {
    138 			return err
    139 		}
    140 		if _, err := io.WriteString(w.cw, h.Comment); err != nil {
    141 			return err
    142 		}
    143 	}
    144 	end := w.cw.count
    145 
    146 	records := uint64(len(w.dir))
    147 	size := uint64(end - start)
    148 	offset := uint64(start)
    149 
    150 	if f := w.testHookCloseSizeOffset; f != nil {
    151 		f(size, offset)
    152 	}
    153 
    154 	if records >= uint16max || size >= uint32max || offset >= uint32max {
    155 		var buf [directory64EndLen + directory64LocLen]byte
    156 		b := writeBuf(buf[:])
    157 
    158 		// zip64 end of central directory record
    159 		b.uint32(directory64EndSignature)
    160 		b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64)
    161 		b.uint16(zipVersion45)           // version made by
    162 		b.uint16(zipVersion45)           // version needed to extract
    163 		b.uint32(0)                      // number of this disk
    164 		b.uint32(0)                      // number of the disk with the start of the central directory
    165 		b.uint64(records)                // total number of entries in the central directory on this disk
    166 		b.uint64(records)                // total number of entries in the central directory
    167 		b.uint64(size)                   // size of the central directory
    168 		b.uint64(offset)                 // offset of start of central directory with respect to the starting disk number
    169 
    170 		// zip64 end of central directory locator
    171 		b.uint32(directory64LocSignature)
    172 		b.uint32(0)           // number of the disk with the start of the zip64 end of central directory
    173 		b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record
    174 		b.uint32(1)           // total number of disks
    175 
    176 		if _, err := w.cw.Write(buf[:]); err != nil {
    177 			return err
    178 		}
    179 
    180 		// store max values in the regular end record to signal that
    181 		// that the zip64 values should be used instead
    182 		records = uint16max
    183 		size = uint32max
    184 		offset = uint32max
    185 	}
    186 
    187 	// write end record
    188 	var buf [directoryEndLen]byte
    189 	b := writeBuf(buf[:])
    190 	b.uint32(uint32(directoryEndSignature))
    191 	b = b[4:]                        // skip over disk number and first disk number (2x uint16)
    192 	b.uint16(uint16(records))        // number of entries this disk
    193 	b.uint16(uint16(records))        // number of entries total
    194 	b.uint32(uint32(size))           // size of directory
    195 	b.uint32(uint32(offset))         // start of directory
    196 	b.uint16(uint16(len(w.comment))) // byte size of EOCD comment
    197 	if _, err := w.cw.Write(buf[:]); err != nil {
    198 		return err
    199 	}
    200 	if _, err := io.WriteString(w.cw, w.comment); err != nil {
    201 		return err
    202 	}
    203 
    204 	return w.cw.w.(*bufio.Writer).Flush()
    205 }
    206 
    207 // Create adds a file to the zip file using the provided name.
    208 // It returns a Writer to which the file contents should be written.
    209 // The file contents will be compressed using the Deflate method.
    210 // The name must be a relative path: it must not start with a drive
    211 // letter (e.g. C:) or leading slash, and only forward slashes are
    212 // allowed.
    213 // The file's contents must be written to the io.Writer before the next
    214 // call to Create, CreateHeader, or Close.
    215 func (w *Writer) Create(name string) (io.Writer, error) {
    216 	header := &FileHeader{
    217 		Name:   name,
    218 		Method: Deflate,
    219 	}
    220 	return w.CreateHeader(header)
    221 }
    222 
    223 // detectUTF8 reports whether s is a valid UTF-8 string, and whether the string
    224 // must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII,
    225 // or any other common encoding).
    226 func detectUTF8(s string) (valid, require bool) {
    227 	for i := 0; i < len(s); {
    228 		r, size := utf8.DecodeRuneInString(s[i:])
    229 		i += size
    230 		// Officially, ZIP uses CP-437, but many readers use the system's
    231 		// local character encoding. Most encoding are compatible with a large
    232 		// subset of CP-437, which itself is ASCII-like.
    233 		//
    234 		// Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those
    235 		// characters with localized currency and overline characters.
    236 		if r < 0x20 || r > 0x7d || r == 0x5c {
    237 			if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) {
    238 				return false, false
    239 			}
    240 			require = true
    241 		}
    242 	}
    243 	return true, require
    244 }
    245 
    246 // CreateHeader adds a file to the zip archive using the provided FileHeader
    247 // for the file metadata. Writer takes ownership of fh and may mutate
    248 // its fields. The caller must not modify fh after calling CreateHeader.
    249 //
    250 // This returns a Writer to which the file contents should be written.
    251 // The file's contents must be written to the io.Writer before the next
    252 // call to Create, CreateHeader, or Close.
    253 func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
    254 	if w.last != nil && !w.last.closed {
    255 		if err := w.last.close(); err != nil {
    256 			return nil, err
    257 		}
    258 	}
    259 	if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
    260 		// See https://golang.org/issue/11144 confusion.
    261 		return nil, errors.New("archive/zip: invalid duplicate FileHeader")
    262 	}
    263 
    264 	fh.Flags |= 0x8 // we will write a data descriptor
    265 
    266 	// The ZIP format has a sad state of affairs regarding character encoding.
    267 	// Officially, the name and comment fields are supposed to be encoded
    268 	// in CP-437 (which is mostly compatible with ASCII), unless the UTF-8
    269 	// flag bit is set. However, there are several problems:
    270 	//
    271 	//	* Many ZIP readers still do not support UTF-8.
    272 	//	* If the UTF-8 flag is cleared, several readers simply interpret the
    273 	//	name and comment fields as whatever the local system encoding is.
    274 	//
    275 	// In order to avoid breaking readers without UTF-8 support,
    276 	// we avoid setting the UTF-8 flag if the strings are CP-437 compatible.
    277 	// However, if the strings require multibyte UTF-8 encoding and is a
    278 	// valid UTF-8 string, then we set the UTF-8 bit.
    279 	//
    280 	// For the case, where the user explicitly wants to specify the encoding
    281 	// as UTF-8, they will need to set the flag bit themselves.
    282 	utf8Valid1, utf8Require1 := detectUTF8(fh.Name)
    283 	utf8Valid2, utf8Require2 := detectUTF8(fh.Comment)
    284 	switch {
    285 	case fh.NonUTF8:
    286 		fh.Flags &^= 0x800
    287 	case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2):
    288 		fh.Flags |= 0x800
    289 	}
    290 
    291 	fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte
    292 	fh.ReaderVersion = zipVersion20
    293 
    294 	// If Modified is set, this takes precedence over MS-DOS timestamp fields.
    295 	if !fh.Modified.IsZero() {
    296 		// Contrary to the FileHeader.SetModTime method, we intentionally
    297 		// do not convert to UTC, because we assume the user intends to encode
    298 		// the date using the specified timezone. A user may want this control
    299 		// because many legacy ZIP readers interpret the timestamp according
    300 		// to the local timezone.
    301 		//
    302 		// The timezone is only non-UTC if a user directly sets the Modified
    303 		// field directly themselves. All other approaches sets UTC.
    304 		fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified)
    305 
    306 		// Use "extended timestamp" format since this is what Info-ZIP uses.
    307 		// Nearly every major ZIP implementation uses a different format,
    308 		// but at least most seem to be able to understand the other formats.
    309 		//
    310 		// This format happens to be identical for both local and central header
    311 		// if modification time is the only timestamp being encoded.
    312 		var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32)
    313 		mt := uint32(fh.Modified.Unix())
    314 		eb := writeBuf(mbuf[:])
    315 		eb.uint16(extTimeExtraID)
    316 		eb.uint16(5)  // Size: SizeOf(uint8) + SizeOf(uint32)
    317 		eb.uint8(1)   // Flags: ModTime
    318 		eb.uint32(mt) // ModTime
    319 		fh.Extra = append(fh.Extra, mbuf[:]...)
    320 	}
    321 
    322 	fw := &fileWriter{
    323 		zipw:      w.cw,
    324 		compCount: &countWriter{w: w.cw},
    325 		crc32:     crc32.NewIEEE(),
    326 	}
    327 	comp := w.compressor(fh.Method)
    328 	if comp == nil {
    329 		return nil, ErrAlgorithm
    330 	}
    331 	var err error
    332 	fw.comp, err = comp(fw.compCount)
    333 	if err != nil {
    334 		return nil, err
    335 	}
    336 	fw.rawCount = &countWriter{w: fw.comp}
    337 
    338 	h := &header{
    339 		FileHeader: fh,
    340 		offset:     uint64(w.cw.count),
    341 	}
    342 	w.dir = append(w.dir, h)
    343 	fw.header = h
    344 
    345 	if err := writeHeader(w.cw, fh); err != nil {
    346 		return nil, err
    347 	}
    348 
    349 	w.last = fw
    350 	return fw, nil
    351 }
    352 
    353 func writeHeader(w io.Writer, h *FileHeader) error {
    354 	const maxUint16 = 1<<16 - 1
    355 	if len(h.Name) > maxUint16 {
    356 		return errLongName
    357 	}
    358 	if len(h.Extra) > maxUint16 {
    359 		return errLongExtra
    360 	}
    361 
    362 	var buf [fileHeaderLen]byte
    363 	b := writeBuf(buf[:])
    364 	b.uint32(uint32(fileHeaderSignature))
    365 	b.uint16(h.ReaderVersion)
    366 	b.uint16(h.Flags)
    367 	b.uint16(h.Method)
    368 	b.uint16(h.ModifiedTime)
    369 	b.uint16(h.ModifiedDate)
    370 	b.uint32(0) // since we are writing a data descriptor crc32,
    371 	b.uint32(0) // compressed size,
    372 	b.uint32(0) // and uncompressed size should be zero
    373 	b.uint16(uint16(len(h.Name)))
    374 	b.uint16(uint16(len(h.Extra)))
    375 	if _, err := w.Write(buf[:]); err != nil {
    376 		return err
    377 	}
    378 	if _, err := io.WriteString(w, h.Name); err != nil {
    379 		return err
    380 	}
    381 	_, err := w.Write(h.Extra)
    382 	return err
    383 }
    384 
    385 // RegisterCompressor registers or overrides a custom compressor for a specific
    386 // method ID. If a compressor for a given method is not found, Writer will
    387 // default to looking up the compressor at the package level.
    388 func (w *Writer) RegisterCompressor(method uint16, comp Compressor) {
    389 	if w.compressors == nil {
    390 		w.compressors = make(map[uint16]Compressor)
    391 	}
    392 	w.compressors[method] = comp
    393 }
    394 
    395 func (w *Writer) compressor(method uint16) Compressor {
    396 	comp := w.compressors[method]
    397 	if comp == nil {
    398 		comp = compressor(method)
    399 	}
    400 	return comp
    401 }
    402 
    403 type fileWriter struct {
    404 	*header
    405 	zipw      io.Writer
    406 	rawCount  *countWriter
    407 	comp      io.WriteCloser
    408 	compCount *countWriter
    409 	crc32     hash.Hash32
    410 	closed    bool
    411 }
    412 
    413 func (w *fileWriter) Write(p []byte) (int, error) {
    414 	if w.closed {
    415 		return 0, errors.New("zip: write to closed file")
    416 	}
    417 	w.crc32.Write(p)
    418 	return w.rawCount.Write(p)
    419 }
    420 
    421 func (w *fileWriter) close() error {
    422 	if w.closed {
    423 		return errors.New("zip: file closed twice")
    424 	}
    425 	w.closed = true
    426 	if err := w.comp.Close(); err != nil {
    427 		return err
    428 	}
    429 
    430 	// update FileHeader
    431 	fh := w.header.FileHeader
    432 	fh.CRC32 = w.crc32.Sum32()
    433 	fh.CompressedSize64 = uint64(w.compCount.count)
    434 	fh.UncompressedSize64 = uint64(w.rawCount.count)
    435 
    436 	if fh.isZip64() {
    437 		fh.CompressedSize = uint32max
    438 		fh.UncompressedSize = uint32max
    439 		fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions
    440 	} else {
    441 		fh.CompressedSize = uint32(fh.CompressedSize64)
    442 		fh.UncompressedSize = uint32(fh.UncompressedSize64)
    443 	}
    444 
    445 	// Write data descriptor. This is more complicated than one would
    446 	// think, see e.g. comments in zipfile.c:putextended() and
    447 	// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588.
    448 	// The approach here is to write 8 byte sizes if needed without
    449 	// adding a zip64 extra in the local header (too late anyway).
    450 	var buf []byte
    451 	if fh.isZip64() {
    452 		buf = make([]byte, dataDescriptor64Len)
    453 	} else {
    454 		buf = make([]byte, dataDescriptorLen)
    455 	}
    456 	b := writeBuf(buf)
    457 	b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X
    458 	b.uint32(fh.CRC32)
    459 	if fh.isZip64() {
    460 		b.uint64(fh.CompressedSize64)
    461 		b.uint64(fh.UncompressedSize64)
    462 	} else {
    463 		b.uint32(fh.CompressedSize)
    464 		b.uint32(fh.UncompressedSize)
    465 	}
    466 	_, err := w.zipw.Write(buf)
    467 	return err
    468 }
    469 
    470 type countWriter struct {
    471 	w     io.Writer
    472 	count int64
    473 }
    474 
    475 func (w *countWriter) Write(p []byte) (int, error) {
    476 	n, err := w.w.Write(p)
    477 	w.count += int64(n)
    478 	return n, err
    479 }
    480 
    481 type nopCloser struct {
    482 	io.Writer
    483 }
    484 
    485 func (w nopCloser) Close() error {
    486 	return nil
    487 }
    488 
    489 type writeBuf []byte
    490 
    491 func (b *writeBuf) uint8(v uint8) {
    492 	(*b)[0] = v
    493 	*b = (*b)[1:]
    494 }
    495 
    496 func (b *writeBuf) uint16(v uint16) {
    497 	binary.LittleEndian.PutUint16(*b, v)
    498 	*b = (*b)[2:]
    499 }
    500 
    501 func (b *writeBuf) uint32(v uint32) {
    502 	binary.LittleEndian.PutUint32(*b, v)
    503 	*b = (*b)[4:]
    504 }
    505 
    506 func (b *writeBuf) uint64(v uint64) {
    507 	binary.LittleEndian.PutUint64(*b, v)
    508 	*b = (*b)[8:]
    509 }
    510