Home | History | Annotate | Download | only in gzip
      1 // Copyright 2010 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package gzip
      6 
      7 import (
      8 	"compress/flate"
      9 	"errors"
     10 	"fmt"
     11 	"hash/crc32"
     12 	"io"
     13 	"time"
     14 )
     15 
     16 // These constants are copied from the flate package, so that code that imports
     17 // "compress/gzip" does not also have to import "compress/flate".
     18 const (
     19 	NoCompression      = flate.NoCompression
     20 	BestSpeed          = flate.BestSpeed
     21 	BestCompression    = flate.BestCompression
     22 	DefaultCompression = flate.DefaultCompression
     23 	HuffmanOnly        = flate.HuffmanOnly
     24 )
     25 
     26 // A Writer is an io.WriteCloser.
     27 // Writes to a Writer are compressed and written to w.
     28 type Writer struct {
     29 	Header      // written at first call to Write, Flush, or Close
     30 	w           io.Writer
     31 	level       int
     32 	wroteHeader bool
     33 	compressor  *flate.Writer
     34 	digest      uint32 // CRC-32, IEEE polynomial (section 8)
     35 	size        uint32 // Uncompressed size (section 2.3.1)
     36 	closed      bool
     37 	buf         [10]byte
     38 	err         error
     39 }
     40 
     41 // NewWriter returns a new Writer.
     42 // Writes to the returned writer are compressed and written to w.
     43 //
     44 // It is the caller's responsibility to call Close on the WriteCloser when done.
     45 // Writes may be buffered and not flushed until Close.
     46 //
     47 // Callers that wish to set the fields in Writer.Header must do so before
     48 // the first call to Write, Flush, or Close.
     49 func NewWriter(w io.Writer) *Writer {
     50 	z, _ := NewWriterLevel(w, DefaultCompression)
     51 	return z
     52 }
     53 
     54 // NewWriterLevel is like NewWriter but specifies the compression level instead
     55 // of assuming DefaultCompression.
     56 //
     57 // The compression level can be DefaultCompression, NoCompression, HuffmanOnly
     58 // or any integer value between BestSpeed and BestCompression inclusive.
     59 // The error returned will be nil if the level is valid.
     60 func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
     61 	if level < HuffmanOnly || level > BestCompression {
     62 		return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
     63 	}
     64 	z := new(Writer)
     65 	z.init(w, level)
     66 	return z, nil
     67 }
     68 
     69 func (z *Writer) init(w io.Writer, level int) {
     70 	compressor := z.compressor
     71 	if compressor != nil {
     72 		compressor.Reset(w)
     73 	}
     74 	*z = Writer{
     75 		Header: Header{
     76 			OS: 255, // unknown
     77 		},
     78 		w:          w,
     79 		level:      level,
     80 		compressor: compressor,
     81 	}
     82 }
     83 
     84 // Reset discards the Writer z's state and makes it equivalent to the
     85 // result of its original state from NewWriter or NewWriterLevel, but
     86 // writing to w instead. This permits reusing a Writer rather than
     87 // allocating a new one.
     88 func (z *Writer) Reset(w io.Writer) {
     89 	z.init(w, z.level)
     90 }
     91 
     92 // writeBytes writes a length-prefixed byte slice to z.w.
     93 func (z *Writer) writeBytes(b []byte) error {
     94 	if len(b) > 0xffff {
     95 		return errors.New("gzip.Write: Extra data is too large")
     96 	}
     97 	le.PutUint16(z.buf[:2], uint16(len(b)))
     98 	_, err := z.w.Write(z.buf[:2])
     99 	if err != nil {
    100 		return err
    101 	}
    102 	_, err = z.w.Write(b)
    103 	return err
    104 }
    105 
    106 // writeString writes a UTF-8 string s in GZIP's format to z.w.
    107 // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
    108 func (z *Writer) writeString(s string) (err error) {
    109 	// GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII.
    110 	needconv := false
    111 	for _, v := range s {
    112 		if v == 0 || v > 0xff {
    113 			return errors.New("gzip.Write: non-Latin-1 header string")
    114 		}
    115 		if v > 0x7f {
    116 			needconv = true
    117 		}
    118 	}
    119 	if needconv {
    120 		b := make([]byte, 0, len(s))
    121 		for _, v := range s {
    122 			b = append(b, byte(v))
    123 		}
    124 		_, err = z.w.Write(b)
    125 	} else {
    126 		_, err = io.WriteString(z.w, s)
    127 	}
    128 	if err != nil {
    129 		return err
    130 	}
    131 	// GZIP strings are NUL-terminated.
    132 	z.buf[0] = 0
    133 	_, err = z.w.Write(z.buf[:1])
    134 	return err
    135 }
    136 
    137 // Write writes a compressed form of p to the underlying io.Writer. The
    138 // compressed bytes are not necessarily flushed until the Writer is closed.
    139 func (z *Writer) Write(p []byte) (int, error) {
    140 	if z.err != nil {
    141 		return 0, z.err
    142 	}
    143 	var n int
    144 	// Write the GZIP header lazily.
    145 	if !z.wroteHeader {
    146 		z.wroteHeader = true
    147 		z.buf = [10]byte{0: gzipID1, 1: gzipID2, 2: gzipDeflate}
    148 		if z.Extra != nil {
    149 			z.buf[3] |= 0x04
    150 		}
    151 		if z.Name != "" {
    152 			z.buf[3] |= 0x08
    153 		}
    154 		if z.Comment != "" {
    155 			z.buf[3] |= 0x10
    156 		}
    157 		if z.ModTime.After(time.Unix(0, 0)) {
    158 			// Section 2.3.1, the zero value for MTIME means that the
    159 			// modified time is not set.
    160 			le.PutUint32(z.buf[4:8], uint32(z.ModTime.Unix()))
    161 		}
    162 		if z.level == BestCompression {
    163 			z.buf[8] = 2
    164 		} else if z.level == BestSpeed {
    165 			z.buf[8] = 4
    166 		}
    167 		z.buf[9] = z.OS
    168 		n, z.err = z.w.Write(z.buf[:10])
    169 		if z.err != nil {
    170 			return n, z.err
    171 		}
    172 		if z.Extra != nil {
    173 			z.err = z.writeBytes(z.Extra)
    174 			if z.err != nil {
    175 				return n, z.err
    176 			}
    177 		}
    178 		if z.Name != "" {
    179 			z.err = z.writeString(z.Name)
    180 			if z.err != nil {
    181 				return n, z.err
    182 			}
    183 		}
    184 		if z.Comment != "" {
    185 			z.err = z.writeString(z.Comment)
    186 			if z.err != nil {
    187 				return n, z.err
    188 			}
    189 		}
    190 		if z.compressor == nil {
    191 			z.compressor, _ = flate.NewWriter(z.w, z.level)
    192 		}
    193 	}
    194 	z.size += uint32(len(p))
    195 	z.digest = crc32.Update(z.digest, crc32.IEEETable, p)
    196 	n, z.err = z.compressor.Write(p)
    197 	return n, z.err
    198 }
    199 
    200 // Flush flushes any pending compressed data to the underlying writer.
    201 //
    202 // It is useful mainly in compressed network protocols, to ensure that
    203 // a remote reader has enough data to reconstruct a packet. Flush does
    204 // not return until the data has been written. If the underlying
    205 // writer returns an error, Flush returns that error.
    206 //
    207 // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
    208 func (z *Writer) Flush() error {
    209 	if z.err != nil {
    210 		return z.err
    211 	}
    212 	if z.closed {
    213 		return nil
    214 	}
    215 	if !z.wroteHeader {
    216 		z.Write(nil)
    217 		if z.err != nil {
    218 			return z.err
    219 		}
    220 	}
    221 	z.err = z.compressor.Flush()
    222 	return z.err
    223 }
    224 
    225 // Close closes the Writer, flushing any unwritten data to the underlying
    226 // io.Writer, but does not close the underlying io.Writer.
    227 func (z *Writer) Close() error {
    228 	if z.err != nil {
    229 		return z.err
    230 	}
    231 	if z.closed {
    232 		return nil
    233 	}
    234 	z.closed = true
    235 	if !z.wroteHeader {
    236 		z.Write(nil)
    237 		if z.err != nil {
    238 			return z.err
    239 		}
    240 	}
    241 	z.err = z.compressor.Close()
    242 	if z.err != nil {
    243 		return z.err
    244 	}
    245 	le.PutUint32(z.buf[:4], z.digest)
    246 	le.PutUint32(z.buf[4:8], z.size)
    247 	_, z.err = z.w.Write(z.buf[:8])
    248 	return z.err
    249 }
    250