1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package zip 6 7 import ( 8 "bufio" 9 "encoding/binary" 10 "errors" 11 "hash" 12 "hash/crc32" 13 "io" 14 "unicode/utf8" 15 ) 16 17 var ( 18 errLongName = errors.New("zip: FileHeader.Name too long") 19 errLongExtra = errors.New("zip: FileHeader.Extra too long") 20 ) 21 22 // Writer implements a zip file writer. 23 type Writer struct { 24 cw *countWriter 25 dir []*header 26 last *fileWriter 27 closed bool 28 compressors map[uint16]Compressor 29 comment string 30 31 // testHookCloseSizeOffset if non-nil is called with the size 32 // of offset of the central directory at Close. 33 testHookCloseSizeOffset func(size, offset uint64) 34 } 35 36 type header struct { 37 *FileHeader 38 offset uint64 39 } 40 41 // NewWriter returns a new Writer writing a zip file to w. 42 func NewWriter(w io.Writer) *Writer { 43 return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}} 44 } 45 46 // SetOffset sets the offset of the beginning of the zip data within the 47 // underlying writer. It should be used when the zip data is appended to an 48 // existing file, such as a binary executable. 49 // It must be called before any data is written. 50 func (w *Writer) SetOffset(n int64) { 51 if w.cw.count != 0 { 52 panic("zip: SetOffset called after data was written") 53 } 54 w.cw.count = n 55 } 56 57 // Flush flushes any buffered data to the underlying writer. 58 // Calling Flush is not normally necessary; calling Close is sufficient. 59 func (w *Writer) Flush() error { 60 return w.cw.w.(*bufio.Writer).Flush() 61 } 62 63 // SetComment sets the end-of-central-directory comment field. 64 // It can only be called before Close. 65 func (w *Writer) SetComment(comment string) error { 66 if len(comment) > uint16max { 67 return errors.New("zip: Writer.Comment too long") 68 } 69 w.comment = comment 70 return nil 71 } 72 73 // Close finishes writing the zip file by writing the central directory. 74 // It does not (and cannot) close the underlying writer. 75 func (w *Writer) Close() error { 76 if w.last != nil && !w.last.closed { 77 if err := w.last.close(); err != nil { 78 return err 79 } 80 w.last = nil 81 } 82 if w.closed { 83 return errors.New("zip: writer closed twice") 84 } 85 w.closed = true 86 87 // write central directory 88 start := w.cw.count 89 for _, h := range w.dir { 90 var buf [directoryHeaderLen]byte 91 b := writeBuf(buf[:]) 92 b.uint32(uint32(directoryHeaderSignature)) 93 b.uint16(h.CreatorVersion) 94 b.uint16(h.ReaderVersion) 95 b.uint16(h.Flags) 96 b.uint16(h.Method) 97 b.uint16(h.ModifiedTime) 98 b.uint16(h.ModifiedDate) 99 b.uint32(h.CRC32) 100 if h.isZip64() || h.offset >= uint32max { 101 // the file needs a zip64 header. store maxint in both 102 // 32 bit size fields (and offset later) to signal that the 103 // zip64 extra header should be used. 104 b.uint32(uint32max) // compressed size 105 b.uint32(uint32max) // uncompressed size 106 107 // append a zip64 extra block to Extra 108 var buf [28]byte // 2x uint16 + 3x uint64 109 eb := writeBuf(buf[:]) 110 eb.uint16(zip64ExtraID) 111 eb.uint16(24) // size = 3x uint64 112 eb.uint64(h.UncompressedSize64) 113 eb.uint64(h.CompressedSize64) 114 eb.uint64(h.offset) 115 h.Extra = append(h.Extra, buf[:]...) 116 } else { 117 b.uint32(h.CompressedSize) 118 b.uint32(h.UncompressedSize) 119 } 120 121 b.uint16(uint16(len(h.Name))) 122 b.uint16(uint16(len(h.Extra))) 123 b.uint16(uint16(len(h.Comment))) 124 b = b[4:] // skip disk number start and internal file attr (2x uint16) 125 b.uint32(h.ExternalAttrs) 126 if h.offset > uint32max { 127 b.uint32(uint32max) 128 } else { 129 b.uint32(uint32(h.offset)) 130 } 131 if _, err := w.cw.Write(buf[:]); err != nil { 132 return err 133 } 134 if _, err := io.WriteString(w.cw, h.Name); err != nil { 135 return err 136 } 137 if _, err := w.cw.Write(h.Extra); err != nil { 138 return err 139 } 140 if _, err := io.WriteString(w.cw, h.Comment); err != nil { 141 return err 142 } 143 } 144 end := w.cw.count 145 146 records := uint64(len(w.dir)) 147 size := uint64(end - start) 148 offset := uint64(start) 149 150 if f := w.testHookCloseSizeOffset; f != nil { 151 f(size, offset) 152 } 153 154 if records >= uint16max || size >= uint32max || offset >= uint32max { 155 var buf [directory64EndLen + directory64LocLen]byte 156 b := writeBuf(buf[:]) 157 158 // zip64 end of central directory record 159 b.uint32(directory64EndSignature) 160 b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64) 161 b.uint16(zipVersion45) // version made by 162 b.uint16(zipVersion45) // version needed to extract 163 b.uint32(0) // number of this disk 164 b.uint32(0) // number of the disk with the start of the central directory 165 b.uint64(records) // total number of entries in the central directory on this disk 166 b.uint64(records) // total number of entries in the central directory 167 b.uint64(size) // size of the central directory 168 b.uint64(offset) // offset of start of central directory with respect to the starting disk number 169 170 // zip64 end of central directory locator 171 b.uint32(directory64LocSignature) 172 b.uint32(0) // number of the disk with the start of the zip64 end of central directory 173 b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record 174 b.uint32(1) // total number of disks 175 176 if _, err := w.cw.Write(buf[:]); err != nil { 177 return err 178 } 179 180 // store max values in the regular end record to signal that 181 // that the zip64 values should be used instead 182 records = uint16max 183 size = uint32max 184 offset = uint32max 185 } 186 187 // write end record 188 var buf [directoryEndLen]byte 189 b := writeBuf(buf[:]) 190 b.uint32(uint32(directoryEndSignature)) 191 b = b[4:] // skip over disk number and first disk number (2x uint16) 192 b.uint16(uint16(records)) // number of entries this disk 193 b.uint16(uint16(records)) // number of entries total 194 b.uint32(uint32(size)) // size of directory 195 b.uint32(uint32(offset)) // start of directory 196 b.uint16(uint16(len(w.comment))) // byte size of EOCD comment 197 if _, err := w.cw.Write(buf[:]); err != nil { 198 return err 199 } 200 if _, err := io.WriteString(w.cw, w.comment); err != nil { 201 return err 202 } 203 204 return w.cw.w.(*bufio.Writer).Flush() 205 } 206 207 // Create adds a file to the zip file using the provided name. 208 // It returns a Writer to which the file contents should be written. 209 // The file contents will be compressed using the Deflate method. 210 // The name must be a relative path: it must not start with a drive 211 // letter (e.g. C:) or leading slash, and only forward slashes are 212 // allowed. 213 // The file's contents must be written to the io.Writer before the next 214 // call to Create, CreateHeader, or Close. 215 func (w *Writer) Create(name string) (io.Writer, error) { 216 header := &FileHeader{ 217 Name: name, 218 Method: Deflate, 219 } 220 return w.CreateHeader(header) 221 } 222 223 // detectUTF8 reports whether s is a valid UTF-8 string, and whether the string 224 // must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII, 225 // or any other common encoding). 226 func detectUTF8(s string) (valid, require bool) { 227 for i := 0; i < len(s); { 228 r, size := utf8.DecodeRuneInString(s[i:]) 229 i += size 230 // Officially, ZIP uses CP-437, but many readers use the system's 231 // local character encoding. Most encoding are compatible with a large 232 // subset of CP-437, which itself is ASCII-like. 233 // 234 // Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those 235 // characters with localized currency and overline characters. 236 if r < 0x20 || r > 0x7d || r == 0x5c { 237 if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) { 238 return false, false 239 } 240 require = true 241 } 242 } 243 return true, require 244 } 245 246 // CreateHeader adds a file to the zip archive using the provided FileHeader 247 // for the file metadata. Writer takes ownership of fh and may mutate 248 // its fields. The caller must not modify fh after calling CreateHeader. 249 // 250 // This returns a Writer to which the file contents should be written. 251 // The file's contents must be written to the io.Writer before the next 252 // call to Create, CreateHeader, or Close. 253 func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) { 254 if w.last != nil && !w.last.closed { 255 if err := w.last.close(); err != nil { 256 return nil, err 257 } 258 } 259 if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh { 260 // See https://golang.org/issue/11144 confusion. 261 return nil, errors.New("archive/zip: invalid duplicate FileHeader") 262 } 263 264 fh.Flags |= 0x8 // we will write a data descriptor 265 266 // The ZIP format has a sad state of affairs regarding character encoding. 267 // Officially, the name and comment fields are supposed to be encoded 268 // in CP-437 (which is mostly compatible with ASCII), unless the UTF-8 269 // flag bit is set. However, there are several problems: 270 // 271 // * Many ZIP readers still do not support UTF-8. 272 // * If the UTF-8 flag is cleared, several readers simply interpret the 273 // name and comment fields as whatever the local system encoding is. 274 // 275 // In order to avoid breaking readers without UTF-8 support, 276 // we avoid setting the UTF-8 flag if the strings are CP-437 compatible. 277 // However, if the strings require multibyte UTF-8 encoding and is a 278 // valid UTF-8 string, then we set the UTF-8 bit. 279 // 280 // For the case, where the user explicitly wants to specify the encoding 281 // as UTF-8, they will need to set the flag bit themselves. 282 utf8Valid1, utf8Require1 := detectUTF8(fh.Name) 283 utf8Valid2, utf8Require2 := detectUTF8(fh.Comment) 284 switch { 285 case fh.NonUTF8: 286 fh.Flags &^= 0x800 287 case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2): 288 fh.Flags |= 0x800 289 } 290 291 fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte 292 fh.ReaderVersion = zipVersion20 293 294 // If Modified is set, this takes precedence over MS-DOS timestamp fields. 295 if !fh.Modified.IsZero() { 296 // Contrary to the FileHeader.SetModTime method, we intentionally 297 // do not convert to UTC, because we assume the user intends to encode 298 // the date using the specified timezone. A user may want this control 299 // because many legacy ZIP readers interpret the timestamp according 300 // to the local timezone. 301 // 302 // The timezone is only non-UTC if a user directly sets the Modified 303 // field directly themselves. All other approaches sets UTC. 304 fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified) 305 306 // Use "extended timestamp" format since this is what Info-ZIP uses. 307 // Nearly every major ZIP implementation uses a different format, 308 // but at least most seem to be able to understand the other formats. 309 // 310 // This format happens to be identical for both local and central header 311 // if modification time is the only timestamp being encoded. 312 var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32) 313 mt := uint32(fh.Modified.Unix()) 314 eb := writeBuf(mbuf[:]) 315 eb.uint16(extTimeExtraID) 316 eb.uint16(5) // Size: SizeOf(uint8) + SizeOf(uint32) 317 eb.uint8(1) // Flags: ModTime 318 eb.uint32(mt) // ModTime 319 fh.Extra = append(fh.Extra, mbuf[:]...) 320 } 321 322 fw := &fileWriter{ 323 zipw: w.cw, 324 compCount: &countWriter{w: w.cw}, 325 crc32: crc32.NewIEEE(), 326 } 327 comp := w.compressor(fh.Method) 328 if comp == nil { 329 return nil, ErrAlgorithm 330 } 331 var err error 332 fw.comp, err = comp(fw.compCount) 333 if err != nil { 334 return nil, err 335 } 336 fw.rawCount = &countWriter{w: fw.comp} 337 338 h := &header{ 339 FileHeader: fh, 340 offset: uint64(w.cw.count), 341 } 342 w.dir = append(w.dir, h) 343 fw.header = h 344 345 if err := writeHeader(w.cw, fh); err != nil { 346 return nil, err 347 } 348 349 w.last = fw 350 return fw, nil 351 } 352 353 func writeHeader(w io.Writer, h *FileHeader) error { 354 const maxUint16 = 1<<16 - 1 355 if len(h.Name) > maxUint16 { 356 return errLongName 357 } 358 if len(h.Extra) > maxUint16 { 359 return errLongExtra 360 } 361 362 var buf [fileHeaderLen]byte 363 b := writeBuf(buf[:]) 364 b.uint32(uint32(fileHeaderSignature)) 365 b.uint16(h.ReaderVersion) 366 b.uint16(h.Flags) 367 b.uint16(h.Method) 368 b.uint16(h.ModifiedTime) 369 b.uint16(h.ModifiedDate) 370 b.uint32(0) // since we are writing a data descriptor crc32, 371 b.uint32(0) // compressed size, 372 b.uint32(0) // and uncompressed size should be zero 373 b.uint16(uint16(len(h.Name))) 374 b.uint16(uint16(len(h.Extra))) 375 if _, err := w.Write(buf[:]); err != nil { 376 return err 377 } 378 if _, err := io.WriteString(w, h.Name); err != nil { 379 return err 380 } 381 _, err := w.Write(h.Extra) 382 return err 383 } 384 385 // RegisterCompressor registers or overrides a custom compressor for a specific 386 // method ID. If a compressor for a given method is not found, Writer will 387 // default to looking up the compressor at the package level. 388 func (w *Writer) RegisterCompressor(method uint16, comp Compressor) { 389 if w.compressors == nil { 390 w.compressors = make(map[uint16]Compressor) 391 } 392 w.compressors[method] = comp 393 } 394 395 func (w *Writer) compressor(method uint16) Compressor { 396 comp := w.compressors[method] 397 if comp == nil { 398 comp = compressor(method) 399 } 400 return comp 401 } 402 403 type fileWriter struct { 404 *header 405 zipw io.Writer 406 rawCount *countWriter 407 comp io.WriteCloser 408 compCount *countWriter 409 crc32 hash.Hash32 410 closed bool 411 } 412 413 func (w *fileWriter) Write(p []byte) (int, error) { 414 if w.closed { 415 return 0, errors.New("zip: write to closed file") 416 } 417 w.crc32.Write(p) 418 return w.rawCount.Write(p) 419 } 420 421 func (w *fileWriter) close() error { 422 if w.closed { 423 return errors.New("zip: file closed twice") 424 } 425 w.closed = true 426 if err := w.comp.Close(); err != nil { 427 return err 428 } 429 430 // update FileHeader 431 fh := w.header.FileHeader 432 fh.CRC32 = w.crc32.Sum32() 433 fh.CompressedSize64 = uint64(w.compCount.count) 434 fh.UncompressedSize64 = uint64(w.rawCount.count) 435 436 if fh.isZip64() { 437 fh.CompressedSize = uint32max 438 fh.UncompressedSize = uint32max 439 fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions 440 } else { 441 fh.CompressedSize = uint32(fh.CompressedSize64) 442 fh.UncompressedSize = uint32(fh.UncompressedSize64) 443 } 444 445 // Write data descriptor. This is more complicated than one would 446 // think, see e.g. comments in zipfile.c:putextended() and 447 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588. 448 // The approach here is to write 8 byte sizes if needed without 449 // adding a zip64 extra in the local header (too late anyway). 450 var buf []byte 451 if fh.isZip64() { 452 buf = make([]byte, dataDescriptor64Len) 453 } else { 454 buf = make([]byte, dataDescriptorLen) 455 } 456 b := writeBuf(buf) 457 b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X 458 b.uint32(fh.CRC32) 459 if fh.isZip64() { 460 b.uint64(fh.CompressedSize64) 461 b.uint64(fh.UncompressedSize64) 462 } else { 463 b.uint32(fh.CompressedSize) 464 b.uint32(fh.UncompressedSize) 465 } 466 _, err := w.zipw.Write(buf) 467 return err 468 } 469 470 type countWriter struct { 471 w io.Writer 472 count int64 473 } 474 475 func (w *countWriter) Write(p []byte) (int, error) { 476 n, err := w.w.Write(p) 477 w.count += int64(n) 478 return n, err 479 } 480 481 type nopCloser struct { 482 io.Writer 483 } 484 485 func (w nopCloser) Close() error { 486 return nil 487 } 488 489 type writeBuf []byte 490 491 func (b *writeBuf) uint8(v uint8) { 492 (*b)[0] = v 493 *b = (*b)[1:] 494 } 495 496 func (b *writeBuf) uint16(v uint16) { 497 binary.LittleEndian.PutUint16(*b, v) 498 *b = (*b)[2:] 499 } 500 501 func (b *writeBuf) uint32(v uint32) { 502 binary.LittleEndian.PutUint32(*b, v) 503 *b = (*b)[4:] 504 } 505 506 func (b *writeBuf) uint64(v uint64) { 507 binary.LittleEndian.PutUint64(*b, v) 508 *b = (*b)[8:] 509 } 510