Home | History | Annotate | Download | only in hpack
      1 // Copyright 2014 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Package hpack implements HPACK, a compression format for
      6 // efficiently representing HTTP header fields in the context of HTTP/2.
      7 //
      8 // See http://tools.ietf.org/html/draft-ietf-httpbis-header-compression-09
      9 package hpack
     10 
     11 import (
     12 	"bytes"
     13 	"errors"
     14 	"fmt"
     15 )
     16 
     17 // A DecodingError is something the spec defines as a decoding error.
     18 type DecodingError struct {
     19 	Err error
     20 }
     21 
     22 func (de DecodingError) Error() string {
     23 	return fmt.Sprintf("decoding error: %v", de.Err)
     24 }
     25 
     26 // An InvalidIndexError is returned when an encoder references a table
     27 // entry before the static table or after the end of the dynamic table.
     28 type InvalidIndexError int
     29 
     30 func (e InvalidIndexError) Error() string {
     31 	return fmt.Sprintf("invalid indexed representation index %d", int(e))
     32 }
     33 
     34 // A HeaderField is a name-value pair. Both the name and value are
     35 // treated as opaque sequences of octets.
     36 type HeaderField struct {
     37 	Name, Value string
     38 
     39 	// Sensitive means that this header field should never be
     40 	// indexed.
     41 	Sensitive bool
     42 }
     43 
     44 // IsPseudo reports whether the header field is an http2 pseudo header.
     45 // That is, it reports whether it starts with a colon.
     46 // It is not otherwise guaranteed to be a valid pseudo header field,
     47 // though.
     48 func (hf HeaderField) IsPseudo() bool {
     49 	return len(hf.Name) != 0 && hf.Name[0] == ':'
     50 }
     51 
     52 func (hf HeaderField) String() string {
     53 	var suffix string
     54 	if hf.Sensitive {
     55 		suffix = " (sensitive)"
     56 	}
     57 	return fmt.Sprintf("header field %q = %q%s", hf.Name, hf.Value, suffix)
     58 }
     59 
     60 // Size returns the size of an entry per RFC 7541 section 4.1.
     61 func (hf HeaderField) Size() uint32 {
     62 	// http://http2.github.io/http2-spec/compression.html#rfc.section.4.1
     63 	// "The size of the dynamic table is the sum of the size of
     64 	// its entries. The size of an entry is the sum of its name's
     65 	// length in octets (as defined in Section 5.2), its value's
     66 	// length in octets (see Section 5.2), plus 32.  The size of
     67 	// an entry is calculated using the length of the name and
     68 	// value without any Huffman encoding applied."
     69 
     70 	// This can overflow if somebody makes a large HeaderField
     71 	// Name and/or Value by hand, but we don't care, because that
     72 	// won't happen on the wire because the encoding doesn't allow
     73 	// it.
     74 	return uint32(len(hf.Name) + len(hf.Value) + 32)
     75 }
     76 
     77 // A Decoder is the decoding context for incremental processing of
     78 // header blocks.
     79 type Decoder struct {
     80 	dynTab dynamicTable
     81 	emit   func(f HeaderField)
     82 
     83 	emitEnabled bool // whether calls to emit are enabled
     84 	maxStrLen   int  // 0 means unlimited
     85 
     86 	// buf is the unparsed buffer. It's only written to
     87 	// saveBuf if it was truncated in the middle of a header
     88 	// block. Because it's usually not owned, we can only
     89 	// process it under Write.
     90 	buf []byte // not owned; only valid during Write
     91 
     92 	// saveBuf is previous data passed to Write which we weren't able
     93 	// to fully parse before. Unlike buf, we own this data.
     94 	saveBuf bytes.Buffer
     95 }
     96 
     97 // NewDecoder returns a new decoder with the provided maximum dynamic
     98 // table size. The emitFunc will be called for each valid field
     99 // parsed, in the same goroutine as calls to Write, before Write returns.
    100 func NewDecoder(maxDynamicTableSize uint32, emitFunc func(f HeaderField)) *Decoder {
    101 	d := &Decoder{
    102 		emit:        emitFunc,
    103 		emitEnabled: true,
    104 	}
    105 	d.dynTab.table.init()
    106 	d.dynTab.allowedMaxSize = maxDynamicTableSize
    107 	d.dynTab.setMaxSize(maxDynamicTableSize)
    108 	return d
    109 }
    110 
    111 // ErrStringLength is returned by Decoder.Write when the max string length
    112 // (as configured by Decoder.SetMaxStringLength) would be violated.
    113 var ErrStringLength = errors.New("hpack: string too long")
    114 
    115 // SetMaxStringLength sets the maximum size of a HeaderField name or
    116 // value string. If a string exceeds this length (even after any
    117 // decompression), Write will return ErrStringLength.
    118 // A value of 0 means unlimited and is the default from NewDecoder.
    119 func (d *Decoder) SetMaxStringLength(n int) {
    120 	d.maxStrLen = n
    121 }
    122 
    123 // SetEmitFunc changes the callback used when new header fields
    124 // are decoded.
    125 // It must be non-nil. It does not affect EmitEnabled.
    126 func (d *Decoder) SetEmitFunc(emitFunc func(f HeaderField)) {
    127 	d.emit = emitFunc
    128 }
    129 
    130 // SetEmitEnabled controls whether the emitFunc provided to NewDecoder
    131 // should be called. The default is true.
    132 //
    133 // This facility exists to let servers enforce MAX_HEADER_LIST_SIZE
    134 // while still decoding and keeping in-sync with decoder state, but
    135 // without doing unnecessary decompression or generating unnecessary
    136 // garbage for header fields past the limit.
    137 func (d *Decoder) SetEmitEnabled(v bool) { d.emitEnabled = v }
    138 
    139 // EmitEnabled reports whether calls to the emitFunc provided to NewDecoder
    140 // are currently enabled. The default is true.
    141 func (d *Decoder) EmitEnabled() bool { return d.emitEnabled }
    142 
    143 // TODO: add method *Decoder.Reset(maxSize, emitFunc) to let callers re-use Decoders and their
    144 // underlying buffers for garbage reasons.
    145 
    146 func (d *Decoder) SetMaxDynamicTableSize(v uint32) {
    147 	d.dynTab.setMaxSize(v)
    148 }
    149 
    150 // SetAllowedMaxDynamicTableSize sets the upper bound that the encoded
    151 // stream (via dynamic table size updates) may set the maximum size
    152 // to.
    153 func (d *Decoder) SetAllowedMaxDynamicTableSize(v uint32) {
    154 	d.dynTab.allowedMaxSize = v
    155 }
    156 
    157 type dynamicTable struct {
    158 	// http://http2.github.io/http2-spec/compression.html#rfc.section.2.3.2
    159 	table          headerFieldTable
    160 	size           uint32 // in bytes
    161 	maxSize        uint32 // current maxSize
    162 	allowedMaxSize uint32 // maxSize may go up to this, inclusive
    163 }
    164 
    165 func (dt *dynamicTable) setMaxSize(v uint32) {
    166 	dt.maxSize = v
    167 	dt.evict()
    168 }
    169 
    170 func (dt *dynamicTable) add(f HeaderField) {
    171 	dt.table.addEntry(f)
    172 	dt.size += f.Size()
    173 	dt.evict()
    174 }
    175 
    176 // If we're too big, evict old stuff.
    177 func (dt *dynamicTable) evict() {
    178 	var n int
    179 	for dt.size > dt.maxSize && n < dt.table.len() {
    180 		dt.size -= dt.table.ents[n].Size()
    181 		n++
    182 	}
    183 	dt.table.evictOldest(n)
    184 }
    185 
    186 func (d *Decoder) maxTableIndex() int {
    187 	// This should never overflow. RFC 7540 Section 6.5.2 limits the size of
    188 	// the dynamic table to 2^32 bytes, where each entry will occupy more than
    189 	// one byte. Further, the staticTable has a fixed, small length.
    190 	return d.dynTab.table.len() + staticTable.len()
    191 }
    192 
    193 func (d *Decoder) at(i uint64) (hf HeaderField, ok bool) {
    194 	// See Section 2.3.3.
    195 	if i == 0 {
    196 		return
    197 	}
    198 	if i <= uint64(staticTable.len()) {
    199 		return staticTable.ents[i-1], true
    200 	}
    201 	if i > uint64(d.maxTableIndex()) {
    202 		return
    203 	}
    204 	// In the dynamic table, newer entries have lower indices.
    205 	// However, dt.ents[0] is the oldest entry. Hence, dt.ents is
    206 	// the reversed dynamic table.
    207 	dt := d.dynTab.table
    208 	return dt.ents[dt.len()-(int(i)-staticTable.len())], true
    209 }
    210 
    211 // Decode decodes an entire block.
    212 //
    213 // TODO: remove this method and make it incremental later? This is
    214 // easier for debugging now.
    215 func (d *Decoder) DecodeFull(p []byte) ([]HeaderField, error) {
    216 	var hf []HeaderField
    217 	saveFunc := d.emit
    218 	defer func() { d.emit = saveFunc }()
    219 	d.emit = func(f HeaderField) { hf = append(hf, f) }
    220 	if _, err := d.Write(p); err != nil {
    221 		return nil, err
    222 	}
    223 	if err := d.Close(); err != nil {
    224 		return nil, err
    225 	}
    226 	return hf, nil
    227 }
    228 
    229 func (d *Decoder) Close() error {
    230 	if d.saveBuf.Len() > 0 {
    231 		d.saveBuf.Reset()
    232 		return DecodingError{errors.New("truncated headers")}
    233 	}
    234 	return nil
    235 }
    236 
    237 func (d *Decoder) Write(p []byte) (n int, err error) {
    238 	if len(p) == 0 {
    239 		// Prevent state machine CPU attacks (making us redo
    240 		// work up to the point of finding out we don't have
    241 		// enough data)
    242 		return
    243 	}
    244 	// Only copy the data if we have to. Optimistically assume
    245 	// that p will contain a complete header block.
    246 	if d.saveBuf.Len() == 0 {
    247 		d.buf = p
    248 	} else {
    249 		d.saveBuf.Write(p)
    250 		d.buf = d.saveBuf.Bytes()
    251 		d.saveBuf.Reset()
    252 	}
    253 
    254 	for len(d.buf) > 0 {
    255 		err = d.parseHeaderFieldRepr()
    256 		if err == errNeedMore {
    257 			// Extra paranoia, making sure saveBuf won't
    258 			// get too large. All the varint and string
    259 			// reading code earlier should already catch
    260 			// overlong things and return ErrStringLength,
    261 			// but keep this as a last resort.
    262 			const varIntOverhead = 8 // conservative
    263 			if d.maxStrLen != 0 && int64(len(d.buf)) > 2*(int64(d.maxStrLen)+varIntOverhead) {
    264 				return 0, ErrStringLength
    265 			}
    266 			d.saveBuf.Write(d.buf)
    267 			return len(p), nil
    268 		}
    269 		if err != nil {
    270 			break
    271 		}
    272 	}
    273 	return len(p), err
    274 }
    275 
    276 // errNeedMore is an internal sentinel error value that means the
    277 // buffer is truncated and we need to read more data before we can
    278 // continue parsing.
    279 var errNeedMore = errors.New("need more data")
    280 
    281 type indexType int
    282 
    283 const (
    284 	indexedTrue indexType = iota
    285 	indexedFalse
    286 	indexedNever
    287 )
    288 
    289 func (v indexType) indexed() bool   { return v == indexedTrue }
    290 func (v indexType) sensitive() bool { return v == indexedNever }
    291 
    292 // returns errNeedMore if there isn't enough data available.
    293 // any other error is fatal.
    294 // consumes d.buf iff it returns nil.
    295 // precondition: must be called with len(d.buf) > 0
    296 func (d *Decoder) parseHeaderFieldRepr() error {
    297 	b := d.buf[0]
    298 	switch {
    299 	case b&128 != 0:
    300 		// Indexed representation.
    301 		// High bit set?
    302 		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.1
    303 		return d.parseFieldIndexed()
    304 	case b&192 == 64:
    305 		// 6.2.1 Literal Header Field with Incremental Indexing
    306 		// 0b10xxxxxx: top two bits are 10
    307 		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.1
    308 		return d.parseFieldLiteral(6, indexedTrue)
    309 	case b&240 == 0:
    310 		// 6.2.2 Literal Header Field without Indexing
    311 		// 0b0000xxxx: top four bits are 0000
    312 		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.2
    313 		return d.parseFieldLiteral(4, indexedFalse)
    314 	case b&240 == 16:
    315 		// 6.2.3 Literal Header Field never Indexed
    316 		// 0b0001xxxx: top four bits are 0001
    317 		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.3
    318 		return d.parseFieldLiteral(4, indexedNever)
    319 	case b&224 == 32:
    320 		// 6.3 Dynamic Table Size Update
    321 		// Top three bits are '001'.
    322 		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.3
    323 		return d.parseDynamicTableSizeUpdate()
    324 	}
    325 
    326 	return DecodingError{errors.New("invalid encoding")}
    327 }
    328 
    329 // (same invariants and behavior as parseHeaderFieldRepr)
    330 func (d *Decoder) parseFieldIndexed() error {
    331 	buf := d.buf
    332 	idx, buf, err := readVarInt(7, buf)
    333 	if err != nil {
    334 		return err
    335 	}
    336 	hf, ok := d.at(idx)
    337 	if !ok {
    338 		return DecodingError{InvalidIndexError(idx)}
    339 	}
    340 	d.buf = buf
    341 	return d.callEmit(HeaderField{Name: hf.Name, Value: hf.Value})
    342 }
    343 
    344 // (same invariants and behavior as parseHeaderFieldRepr)
    345 func (d *Decoder) parseFieldLiteral(n uint8, it indexType) error {
    346 	buf := d.buf
    347 	nameIdx, buf, err := readVarInt(n, buf)
    348 	if err != nil {
    349 		return err
    350 	}
    351 
    352 	var hf HeaderField
    353 	wantStr := d.emitEnabled || it.indexed()
    354 	if nameIdx > 0 {
    355 		ihf, ok := d.at(nameIdx)
    356 		if !ok {
    357 			return DecodingError{InvalidIndexError(nameIdx)}
    358 		}
    359 		hf.Name = ihf.Name
    360 	} else {
    361 		hf.Name, buf, err = d.readString(buf, wantStr)
    362 		if err != nil {
    363 			return err
    364 		}
    365 	}
    366 	hf.Value, buf, err = d.readString(buf, wantStr)
    367 	if err != nil {
    368 		return err
    369 	}
    370 	d.buf = buf
    371 	if it.indexed() {
    372 		d.dynTab.add(hf)
    373 	}
    374 	hf.Sensitive = it.sensitive()
    375 	return d.callEmit(hf)
    376 }
    377 
    378 func (d *Decoder) callEmit(hf HeaderField) error {
    379 	if d.maxStrLen != 0 {
    380 		if len(hf.Name) > d.maxStrLen || len(hf.Value) > d.maxStrLen {
    381 			return ErrStringLength
    382 		}
    383 	}
    384 	if d.emitEnabled {
    385 		d.emit(hf)
    386 	}
    387 	return nil
    388 }
    389 
    390 // (same invariants and behavior as parseHeaderFieldRepr)
    391 func (d *Decoder) parseDynamicTableSizeUpdate() error {
    392 	// RFC 7541, sec 4.2: This dynamic table size update MUST occur at the
    393 	// beginning of the first header block following the change to the dynamic table size.
    394 	if d.dynTab.size > 0 {
    395 		return DecodingError{errors.New("dynamic table size update MUST occur at the beginning of a header block")}
    396 	}
    397 
    398 	buf := d.buf
    399 	size, buf, err := readVarInt(5, buf)
    400 	if err != nil {
    401 		return err
    402 	}
    403 	if size > uint64(d.dynTab.allowedMaxSize) {
    404 		return DecodingError{errors.New("dynamic table size update too large")}
    405 	}
    406 	d.dynTab.setMaxSize(uint32(size))
    407 	d.buf = buf
    408 	return nil
    409 }
    410 
    411 var errVarintOverflow = DecodingError{errors.New("varint integer overflow")}
    412 
    413 // readVarInt reads an unsigned variable length integer off the
    414 // beginning of p. n is the parameter as described in
    415 // http://http2.github.io/http2-spec/compression.html#rfc.section.5.1.
    416 //
    417 // n must always be between 1 and 8.
    418 //
    419 // The returned remain buffer is either a smaller suffix of p, or err != nil.
    420 // The error is errNeedMore if p doesn't contain a complete integer.
    421 func readVarInt(n byte, p []byte) (i uint64, remain []byte, err error) {
    422 	if n < 1 || n > 8 {
    423 		panic("bad n")
    424 	}
    425 	if len(p) == 0 {
    426 		return 0, p, errNeedMore
    427 	}
    428 	i = uint64(p[0])
    429 	if n < 8 {
    430 		i &= (1 << uint64(n)) - 1
    431 	}
    432 	if i < (1<<uint64(n))-1 {
    433 		return i, p[1:], nil
    434 	}
    435 
    436 	origP := p
    437 	p = p[1:]
    438 	var m uint64
    439 	for len(p) > 0 {
    440 		b := p[0]
    441 		p = p[1:]
    442 		i += uint64(b&127) << m
    443 		if b&128 == 0 {
    444 			return i, p, nil
    445 		}
    446 		m += 7
    447 		if m >= 63 { // TODO: proper overflow check. making this up.
    448 			return 0, origP, errVarintOverflow
    449 		}
    450 	}
    451 	return 0, origP, errNeedMore
    452 }
    453 
    454 // readString decodes an hpack string from p.
    455 //
    456 // wantStr is whether s will be used. If false, decompression and
    457 // []byte->string garbage are skipped if s will be ignored
    458 // anyway. This does mean that huffman decoding errors for non-indexed
    459 // strings past the MAX_HEADER_LIST_SIZE are ignored, but the server
    460 // is returning an error anyway, and because they're not indexed, the error
    461 // won't affect the decoding state.
    462 func (d *Decoder) readString(p []byte, wantStr bool) (s string, remain []byte, err error) {
    463 	if len(p) == 0 {
    464 		return "", p, errNeedMore
    465 	}
    466 	isHuff := p[0]&128 != 0
    467 	strLen, p, err := readVarInt(7, p)
    468 	if err != nil {
    469 		return "", p, err
    470 	}
    471 	if d.maxStrLen != 0 && strLen > uint64(d.maxStrLen) {
    472 		return "", nil, ErrStringLength
    473 	}
    474 	if uint64(len(p)) < strLen {
    475 		return "", p, errNeedMore
    476 	}
    477 	if !isHuff {
    478 		if wantStr {
    479 			s = string(p[:strLen])
    480 		}
    481 		return s, p[strLen:], nil
    482 	}
    483 
    484 	if wantStr {
    485 		buf := bufPool.Get().(*bytes.Buffer)
    486 		buf.Reset() // don't trust others
    487 		defer bufPool.Put(buf)
    488 		if err := huffmanDecode(buf, d.maxStrLen, p[:strLen]); err != nil {
    489 			buf.Reset()
    490 			return "", nil, err
    491 		}
    492 		s = buf.String()
    493 		buf.Reset() // be nice to GC
    494 	}
    495 	return s, p[strLen:], nil
    496 }
    497