Home | History | Annotate | Download | only in internal
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // The wire protocol for HTTP's "chunked" Transfer-Encoding.
      6 
      7 // Package internal contains HTTP internals shared by net/http and
      8 // net/http/httputil.
      9 package internal
     10 
     11 import (
     12 	"bufio"
     13 	"bytes"
     14 	"errors"
     15 	"fmt"
     16 	"io"
     17 )
     18 
     19 const maxLineLength = 4096 // assumed <= bufio.defaultBufSize
     20 
     21 var ErrLineTooLong = errors.New("header line too long")
     22 
     23 // NewChunkedReader returns a new chunkedReader that translates the data read from r
     24 // out of HTTP "chunked" format before returning it.
     25 // The chunkedReader returns io.EOF when the final 0-length chunk is read.
     26 //
     27 // NewChunkedReader is not needed by normal applications. The http package
     28 // automatically decodes chunking when reading response bodies.
     29 func NewChunkedReader(r io.Reader) io.Reader {
     30 	br, ok := r.(*bufio.Reader)
     31 	if !ok {
     32 		br = bufio.NewReader(r)
     33 	}
     34 	return &chunkedReader{r: br}
     35 }
     36 
     37 type chunkedReader struct {
     38 	r        *bufio.Reader
     39 	n        uint64 // unread bytes in chunk
     40 	err      error
     41 	buf      [2]byte
     42 	checkEnd bool // whether need to check for \r\n chunk footer
     43 }
     44 
     45 func (cr *chunkedReader) beginChunk() {
     46 	// chunk-size CRLF
     47 	var line []byte
     48 	line, cr.err = readChunkLine(cr.r)
     49 	if cr.err != nil {
     50 		return
     51 	}
     52 	cr.n, cr.err = parseHexUint(line)
     53 	if cr.err != nil {
     54 		return
     55 	}
     56 	if cr.n == 0 {
     57 		cr.err = io.EOF
     58 	}
     59 }
     60 
     61 func (cr *chunkedReader) chunkHeaderAvailable() bool {
     62 	n := cr.r.Buffered()
     63 	if n > 0 {
     64 		peek, _ := cr.r.Peek(n)
     65 		return bytes.IndexByte(peek, '\n') >= 0
     66 	}
     67 	return false
     68 }
     69 
     70 func (cr *chunkedReader) Read(b []uint8) (n int, err error) {
     71 	for cr.err == nil {
     72 		if cr.checkEnd {
     73 			if n > 0 && cr.r.Buffered() < 2 {
     74 				// We have some data. Return early (per the io.Reader
     75 				// contract) instead of potentially blocking while
     76 				// reading more.
     77 				break
     78 			}
     79 			if _, cr.err = io.ReadFull(cr.r, cr.buf[:2]); cr.err == nil {
     80 				if string(cr.buf[:]) != "\r\n" {
     81 					cr.err = errors.New("malformed chunked encoding")
     82 					break
     83 				}
     84 			}
     85 			cr.checkEnd = false
     86 		}
     87 		if cr.n == 0 {
     88 			if n > 0 && !cr.chunkHeaderAvailable() {
     89 				// We've read enough. Don't potentially block
     90 				// reading a new chunk header.
     91 				break
     92 			}
     93 			cr.beginChunk()
     94 			continue
     95 		}
     96 		if len(b) == 0 {
     97 			break
     98 		}
     99 		rbuf := b
    100 		if uint64(len(rbuf)) > cr.n {
    101 			rbuf = rbuf[:cr.n]
    102 		}
    103 		var n0 int
    104 		n0, cr.err = cr.r.Read(rbuf)
    105 		n += n0
    106 		b = b[n0:]
    107 		cr.n -= uint64(n0)
    108 		// If we're at the end of a chunk, read the next two
    109 		// bytes to verify they are "\r\n".
    110 		if cr.n == 0 && cr.err == nil {
    111 			cr.checkEnd = true
    112 		}
    113 	}
    114 	return n, cr.err
    115 }
    116 
    117 // Read a line of bytes (up to \n) from b.
    118 // Give up if the line exceeds maxLineLength.
    119 // The returned bytes are owned by the bufio.Reader
    120 // so they are only valid until the next bufio read.
    121 func readChunkLine(b *bufio.Reader) ([]byte, error) {
    122 	p, err := b.ReadSlice('\n')
    123 	if err != nil {
    124 		// We always know when EOF is coming.
    125 		// If the caller asked for a line, there should be a line.
    126 		if err == io.EOF {
    127 			err = io.ErrUnexpectedEOF
    128 		} else if err == bufio.ErrBufferFull {
    129 			err = ErrLineTooLong
    130 		}
    131 		return nil, err
    132 	}
    133 	if len(p) >= maxLineLength {
    134 		return nil, ErrLineTooLong
    135 	}
    136 	p = trimTrailingWhitespace(p)
    137 	p, err = removeChunkExtension(p)
    138 	if err != nil {
    139 		return nil, err
    140 	}
    141 	return p, nil
    142 }
    143 
    144 func trimTrailingWhitespace(b []byte) []byte {
    145 	for len(b) > 0 && isASCIISpace(b[len(b)-1]) {
    146 		b = b[:len(b)-1]
    147 	}
    148 	return b
    149 }
    150 
    151 func isASCIISpace(b byte) bool {
    152 	return b == ' ' || b == '\t' || b == '\n' || b == '\r'
    153 }
    154 
    155 // removeChunkExtension removes any chunk-extension from p.
    156 // For example,
    157 //     "0" => "0"
    158 //     "0;token" => "0"
    159 //     "0;token=val" => "0"
    160 //     `0;token="quoted string"` => "0"
    161 func removeChunkExtension(p []byte) ([]byte, error) {
    162 	semi := bytes.IndexByte(p, ';')
    163 	if semi == -1 {
    164 		return p, nil
    165 	}
    166 	// TODO: care about exact syntax of chunk extensions? We're
    167 	// ignoring and stripping them anyway. For now just never
    168 	// return an error.
    169 	return p[:semi], nil
    170 }
    171 
    172 // NewChunkedWriter returns a new chunkedWriter that translates writes into HTTP
    173 // "chunked" format before writing them to w. Closing the returned chunkedWriter
    174 // sends the final 0-length chunk that marks the end of the stream.
    175 //
    176 // NewChunkedWriter is not needed by normal applications. The http
    177 // package adds chunking automatically if handlers don't set a
    178 // Content-Length header. Using newChunkedWriter inside a handler
    179 // would result in double chunking or chunking with a Content-Length
    180 // length, both of which are wrong.
    181 func NewChunkedWriter(w io.Writer) io.WriteCloser {
    182 	return &chunkedWriter{w}
    183 }
    184 
    185 // Writing to chunkedWriter translates to writing in HTTP chunked Transfer
    186 // Encoding wire format to the underlying Wire chunkedWriter.
    187 type chunkedWriter struct {
    188 	Wire io.Writer
    189 }
    190 
    191 // Write the contents of data as one chunk to Wire.
    192 // NOTE: Note that the corresponding chunk-writing procedure in Conn.Write has
    193 // a bug since it does not check for success of io.WriteString
    194 func (cw *chunkedWriter) Write(data []byte) (n int, err error) {
    195 
    196 	// Don't send 0-length data. It looks like EOF for chunked encoding.
    197 	if len(data) == 0 {
    198 		return 0, nil
    199 	}
    200 
    201 	if _, err = fmt.Fprintf(cw.Wire, "%x\r\n", len(data)); err != nil {
    202 		return 0, err
    203 	}
    204 	if n, err = cw.Wire.Write(data); err != nil {
    205 		return
    206 	}
    207 	if n != len(data) {
    208 		err = io.ErrShortWrite
    209 		return
    210 	}
    211 	if _, err = io.WriteString(cw.Wire, "\r\n"); err != nil {
    212 		return
    213 	}
    214 	if bw, ok := cw.Wire.(*FlushAfterChunkWriter); ok {
    215 		err = bw.Flush()
    216 	}
    217 	return
    218 }
    219 
    220 func (cw *chunkedWriter) Close() error {
    221 	_, err := io.WriteString(cw.Wire, "0\r\n")
    222 	return err
    223 }
    224 
    225 // FlushAfterChunkWriter signals from the caller of NewChunkedWriter
    226 // that each chunk should be followed by a flush. It is used by the
    227 // http.Transport code to keep the buffering behavior for headers and
    228 // trailers, but flush out chunks aggressively in the middle for
    229 // request bodies which may be generated slowly. See Issue 6574.
    230 type FlushAfterChunkWriter struct {
    231 	*bufio.Writer
    232 }
    233 
    234 func parseHexUint(v []byte) (n uint64, err error) {
    235 	for i, b := range v {
    236 		switch {
    237 		case '0' <= b && b <= '9':
    238 			b = b - '0'
    239 		case 'a' <= b && b <= 'f':
    240 			b = b - 'a' + 10
    241 		case 'A' <= b && b <= 'F':
    242 			b = b - 'A' + 10
    243 		default:
    244 			return 0, errors.New("invalid byte in chunk length")
    245 		}
    246 		if i == 16 {
    247 			return 0, errors.New("http chunk length too large")
    248 		}
    249 		n <<= 4
    250 		n |= uint64(b)
    251 	}
    252 	return
    253 }
    254