Home | History | Annotate | Download | only in multipart
      1 // Copyright 2010 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 //
      5 
      6 /*
      7 Package multipart implements MIME multipart parsing, as defined in RFC
      8 2046.
      9 
     10 The implementation is sufficient for HTTP (RFC 2388) and the multipart
     11 bodies generated by popular browsers.
     12 */
     13 package multipart
     14 
     15 import (
     16 	"bufio"
     17 	"bytes"
     18 	"fmt"
     19 	"io"
     20 	"io/ioutil"
     21 	"mime"
     22 	"mime/quotedprintable"
     23 	"net/textproto"
     24 )
     25 
     26 var emptyParams = make(map[string]string)
     27 
     28 // This constant needs to be at least 76 for this package to work correctly.
     29 // This is because \r\n--separator_of_len_70- would fill the buffer and it
     30 // wouldn't be safe to consume a single byte from it.
     31 const peekBufferSize = 4096
     32 
     33 // A Part represents a single part in a multipart body.
     34 type Part struct {
     35 	// The headers of the body, if any, with the keys canonicalized
     36 	// in the same fashion that the Go http.Request headers are.
     37 	// For example, "foo-bar" changes case to "Foo-Bar"
     38 	//
     39 	// As a special case, if the "Content-Transfer-Encoding" header
     40 	// has a value of "quoted-printable", that header is instead
     41 	// hidden from this map and the body is transparently decoded
     42 	// during Read calls.
     43 	Header textproto.MIMEHeader
     44 
     45 	mr *Reader
     46 
     47 	disposition       string
     48 	dispositionParams map[string]string
     49 
     50 	// r is either a reader directly reading from mr, or it's a
     51 	// wrapper around such a reader, decoding the
     52 	// Content-Transfer-Encoding
     53 	r io.Reader
     54 
     55 	n       int   // known data bytes waiting in mr.bufReader
     56 	total   int64 // total data bytes read already
     57 	err     error // error to return when n == 0
     58 	readErr error // read error observed from mr.bufReader
     59 }
     60 
     61 // FormName returns the name parameter if p has a Content-Disposition
     62 // of type "form-data".  Otherwise it returns the empty string.
     63 func (p *Part) FormName() string {
     64 	// See http://tools.ietf.org/html/rfc2183 section 2 for EBNF
     65 	// of Content-Disposition value format.
     66 	if p.dispositionParams == nil {
     67 		p.parseContentDisposition()
     68 	}
     69 	if p.disposition != "form-data" {
     70 		return ""
     71 	}
     72 	return p.dispositionParams["name"]
     73 }
     74 
     75 // FileName returns the filename parameter of the Part's
     76 // Content-Disposition header.
     77 func (p *Part) FileName() string {
     78 	if p.dispositionParams == nil {
     79 		p.parseContentDisposition()
     80 	}
     81 	return p.dispositionParams["filename"]
     82 }
     83 
     84 func (p *Part) parseContentDisposition() {
     85 	v := p.Header.Get("Content-Disposition")
     86 	var err error
     87 	p.disposition, p.dispositionParams, err = mime.ParseMediaType(v)
     88 	if err != nil {
     89 		p.dispositionParams = emptyParams
     90 	}
     91 }
     92 
     93 // NewReader creates a new multipart Reader reading from r using the
     94 // given MIME boundary.
     95 //
     96 // The boundary is usually obtained from the "boundary" parameter of
     97 // the message's "Content-Type" header. Use mime.ParseMediaType to
     98 // parse such headers.
     99 func NewReader(r io.Reader, boundary string) *Reader {
    100 	b := []byte("\r\n--" + boundary + "--")
    101 	return &Reader{
    102 		bufReader:        bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize),
    103 		nl:               b[:2],
    104 		nlDashBoundary:   b[:len(b)-2],
    105 		dashBoundaryDash: b[2:],
    106 		dashBoundary:     b[2 : len(b)-2],
    107 	}
    108 }
    109 
    110 // stickyErrorReader is an io.Reader which never calls Read on its
    111 // underlying Reader once an error has been seen. (the io.Reader
    112 // interface's contract promises nothing about the return values of
    113 // Read calls after an error, yet this package does do multiple Reads
    114 // after error)
    115 type stickyErrorReader struct {
    116 	r   io.Reader
    117 	err error
    118 }
    119 
    120 func (r *stickyErrorReader) Read(p []byte) (n int, _ error) {
    121 	if r.err != nil {
    122 		return 0, r.err
    123 	}
    124 	n, r.err = r.r.Read(p)
    125 	return n, r.err
    126 }
    127 
    128 func newPart(mr *Reader) (*Part, error) {
    129 	bp := &Part{
    130 		Header: make(map[string][]string),
    131 		mr:     mr,
    132 	}
    133 	if err := bp.populateHeaders(); err != nil {
    134 		return nil, err
    135 	}
    136 	bp.r = partReader{bp}
    137 	const cte = "Content-Transfer-Encoding"
    138 	if bp.Header.Get(cte) == "quoted-printable" {
    139 		bp.Header.Del(cte)
    140 		bp.r = quotedprintable.NewReader(bp.r)
    141 	}
    142 	return bp, nil
    143 }
    144 
    145 func (bp *Part) populateHeaders() error {
    146 	r := textproto.NewReader(bp.mr.bufReader)
    147 	header, err := r.ReadMIMEHeader()
    148 	if err == nil {
    149 		bp.Header = header
    150 	}
    151 	return err
    152 }
    153 
    154 // Read reads the body of a part, after its headers and before the
    155 // next part (if any) begins.
    156 func (p *Part) Read(d []byte) (n int, err error) {
    157 	return p.r.Read(d)
    158 }
    159 
    160 // partReader implements io.Reader by reading raw bytes directly from the
    161 // wrapped *Part, without doing any Transfer-Encoding decoding.
    162 type partReader struct {
    163 	p *Part
    164 }
    165 
    166 func (pr partReader) Read(d []byte) (int, error) {
    167 	p := pr.p
    168 	br := p.mr.bufReader
    169 
    170 	// Read into buffer until we identify some data to return,
    171 	// or we find a reason to stop (boundary or read error).
    172 	for p.n == 0 && p.err == nil {
    173 		peek, _ := br.Peek(br.Buffered())
    174 		p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr)
    175 		if p.n == 0 && p.err == nil {
    176 			// Force buffered I/O to read more into buffer.
    177 			_, p.readErr = br.Peek(len(peek) + 1)
    178 			if p.readErr == io.EOF {
    179 				p.readErr = io.ErrUnexpectedEOF
    180 			}
    181 		}
    182 	}
    183 
    184 	// Read out from "data to return" part of buffer.
    185 	if p.n == 0 {
    186 		return 0, p.err
    187 	}
    188 	n := len(d)
    189 	if n > p.n {
    190 		n = p.n
    191 	}
    192 	n, _ = br.Read(d[:n])
    193 	p.total += int64(n)
    194 	p.n -= n
    195 	if p.n == 0 {
    196 		return n, p.err
    197 	}
    198 	return n, nil
    199 }
    200 
    201 // scanUntilBoundary scans buf to identify how much of it can be safely
    202 // returned as part of the Part body.
    203 // dashBoundary is "--boundary".
    204 // nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in.
    205 // The comments below (and the name) assume "\n--boundary", but either is accepted.
    206 // total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized.
    207 // readErr is the read error, if any, that followed reading the bytes in buf.
    208 // scanUntilBoundary returns the number of data bytes from buf that can be
    209 // returned as part of the Part body and also the error to return (if any)
    210 // once those data bytes are done.
    211 func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) {
    212 	if total == 0 {
    213 		// At beginning of body, allow dashBoundary.
    214 		if bytes.HasPrefix(buf, dashBoundary) {
    215 			switch matchAfterPrefix(buf, dashBoundary, readErr) {
    216 			case -1:
    217 				return len(dashBoundary), nil
    218 			case 0:
    219 				return 0, nil
    220 			case +1:
    221 				return 0, io.EOF
    222 			}
    223 		}
    224 		if bytes.HasPrefix(dashBoundary, buf) {
    225 			return 0, readErr
    226 		}
    227 	}
    228 
    229 	// Search for "\n--boundary".
    230 	if i := bytes.Index(buf, nlDashBoundary); i >= 0 {
    231 		switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) {
    232 		case -1:
    233 			return i + len(nlDashBoundary), nil
    234 		case 0:
    235 			return i, nil
    236 		case +1:
    237 			return i, io.EOF
    238 		}
    239 	}
    240 	if bytes.HasPrefix(nlDashBoundary, buf) {
    241 		return 0, readErr
    242 	}
    243 
    244 	// Otherwise, anything up to the final \n is not part of the boundary
    245 	// and so must be part of the body.
    246 	// Also if the section from the final \n onward is not a prefix of the boundary,
    247 	// it too must be part of the body.
    248 	i := bytes.LastIndexByte(buf, nlDashBoundary[0])
    249 	if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) {
    250 		return i, nil
    251 	}
    252 	return len(buf), readErr
    253 }
    254 
    255 // matchAfterPrefix checks whether buf should be considered to match the boundary.
    256 // The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary",
    257 // and the caller has verified already that bytes.HasPrefix(buf, prefix) is true.
    258 //
    259 // matchAfterPrefix returns +1 if the buffer does match the boundary,
    260 // meaning the prefix is followed by a dash, space, tab, cr, nl, or end of input.
    261 // It returns -1 if the buffer definitely does NOT match the boundary,
    262 // meaning the prefix is followed by some other character.
    263 // For example, "--foobar" does not match "--foo".
    264 // It returns 0 more input needs to be read to make the decision,
    265 // meaning that len(buf) == len(prefix) and readErr == nil.
    266 func matchAfterPrefix(buf, prefix []byte, readErr error) int {
    267 	if len(buf) == len(prefix) {
    268 		if readErr != nil {
    269 			return +1
    270 		}
    271 		return 0
    272 	}
    273 	c := buf[len(prefix)]
    274 	if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '-' {
    275 		return +1
    276 	}
    277 	return -1
    278 }
    279 
    280 func (p *Part) Close() error {
    281 	io.Copy(ioutil.Discard, p)
    282 	return nil
    283 }
    284 
    285 // Reader is an iterator over parts in a MIME multipart body.
    286 // Reader's underlying parser consumes its input as needed. Seeking
    287 // isn't supported.
    288 type Reader struct {
    289 	bufReader *bufio.Reader
    290 
    291 	currentPart *Part
    292 	partsRead   int
    293 
    294 	nl               []byte // "\r\n" or "\n" (set after seeing first boundary line)
    295 	nlDashBoundary   []byte // nl + "--boundary"
    296 	dashBoundaryDash []byte // "--boundary--"
    297 	dashBoundary     []byte // "--boundary"
    298 }
    299 
    300 // NextPart returns the next part in the multipart or an error.
    301 // When there are no more parts, the error io.EOF is returned.
    302 func (r *Reader) NextPart() (*Part, error) {
    303 	if r.currentPart != nil {
    304 		r.currentPart.Close()
    305 	}
    306 
    307 	expectNewPart := false
    308 	for {
    309 		line, err := r.bufReader.ReadSlice('\n')
    310 
    311 		if err == io.EOF && r.isFinalBoundary(line) {
    312 			// If the buffer ends in "--boundary--" without the
    313 			// trailing "\r\n", ReadSlice will return an error
    314 			// (since it's missing the '\n'), but this is a valid
    315 			// multipart EOF so we need to return io.EOF instead of
    316 			// a fmt-wrapped one.
    317 			return nil, io.EOF
    318 		}
    319 		if err != nil {
    320 			return nil, fmt.Errorf("multipart: NextPart: %v", err)
    321 		}
    322 
    323 		if r.isBoundaryDelimiterLine(line) {
    324 			r.partsRead++
    325 			bp, err := newPart(r)
    326 			if err != nil {
    327 				return nil, err
    328 			}
    329 			r.currentPart = bp
    330 			return bp, nil
    331 		}
    332 
    333 		if r.isFinalBoundary(line) {
    334 			// Expected EOF
    335 			return nil, io.EOF
    336 		}
    337 
    338 		if expectNewPart {
    339 			return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line))
    340 		}
    341 
    342 		if r.partsRead == 0 {
    343 			// skip line
    344 			continue
    345 		}
    346 
    347 		// Consume the "\n" or "\r\n" separator between the
    348 		// body of the previous part and the boundary line we
    349 		// now expect will follow. (either a new part or the
    350 		// end boundary)
    351 		if bytes.Equal(line, r.nl) {
    352 			expectNewPart = true
    353 			continue
    354 		}
    355 
    356 		return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line)
    357 	}
    358 }
    359 
    360 // isFinalBoundary reports whether line is the final boundary line
    361 // indicating that all parts are over.
    362 // It matches `^--boundary--[ \t]*(\r\n)?$`
    363 func (mr *Reader) isFinalBoundary(line []byte) bool {
    364 	if !bytes.HasPrefix(line, mr.dashBoundaryDash) {
    365 		return false
    366 	}
    367 	rest := line[len(mr.dashBoundaryDash):]
    368 	rest = skipLWSPChar(rest)
    369 	return len(rest) == 0 || bytes.Equal(rest, mr.nl)
    370 }
    371 
    372 func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
    373 	// http://tools.ietf.org/html/rfc2046#section-5.1
    374 	//   The boundary delimiter line is then defined as a line
    375 	//   consisting entirely of two hyphen characters ("-",
    376 	//   decimal value 45) followed by the boundary parameter
    377 	//   value from the Content-Type header field, optional linear
    378 	//   whitespace, and a terminating CRLF.
    379 	if !bytes.HasPrefix(line, mr.dashBoundary) {
    380 		return false
    381 	}
    382 	rest := line[len(mr.dashBoundary):]
    383 	rest = skipLWSPChar(rest)
    384 
    385 	// On the first part, see our lines are ending in \n instead of \r\n
    386 	// and switch into that mode if so. This is a violation of the spec,
    387 	// but occurs in practice.
    388 	if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' {
    389 		mr.nl = mr.nl[1:]
    390 		mr.nlDashBoundary = mr.nlDashBoundary[1:]
    391 	}
    392 	return bytes.Equal(rest, mr.nl)
    393 }
    394 
    395 // skipLWSPChar returns b with leading spaces and tabs removed.
    396 // RFC 822 defines:
    397 //    LWSP-char = SPACE / HTAB
    398 func skipLWSPChar(b []byte) []byte {
    399 	for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') {
    400 		b = b[1:]
    401 	}
    402 	return b
    403 }
    404