1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 6 /* 7 Package multipart implements MIME multipart parsing, as defined in RFC 8 2046. 9 10 The implementation is sufficient for HTTP (RFC 2388) and the multipart 11 bodies generated by popular browsers. 12 */ 13 package multipart 14 15 import ( 16 "bufio" 17 "bytes" 18 "fmt" 19 "io" 20 "io/ioutil" 21 "mime" 22 "mime/quotedprintable" 23 "net/textproto" 24 ) 25 26 var emptyParams = make(map[string]string) 27 28 // A Part represents a single part in a multipart body. 29 type Part struct { 30 // The headers of the body, if any, with the keys canonicalized 31 // in the same fashion that the Go http.Request headers are. 32 // For example, "foo-bar" changes case to "Foo-Bar" 33 // 34 // As a special case, if the "Content-Transfer-Encoding" header 35 // has a value of "quoted-printable", that header is instead 36 // hidden from this map and the body is transparently decoded 37 // during Read calls. 38 Header textproto.MIMEHeader 39 40 buffer *bytes.Buffer 41 mr *Reader 42 bytesRead int 43 44 disposition string 45 dispositionParams map[string]string 46 47 // r is either a reader directly reading from mr, or it's a 48 // wrapper around such a reader, decoding the 49 // Content-Transfer-Encoding 50 r io.Reader 51 } 52 53 // FormName returns the name parameter if p has a Content-Disposition 54 // of type "form-data". Otherwise it returns the empty string. 55 func (p *Part) FormName() string { 56 // See http://tools.ietf.org/html/rfc2183 section 2 for EBNF 57 // of Content-Disposition value format. 58 if p.dispositionParams == nil { 59 p.parseContentDisposition() 60 } 61 if p.disposition != "form-data" { 62 return "" 63 } 64 return p.dispositionParams["name"] 65 } 66 67 // FileName returns the filename parameter of the Part's 68 // Content-Disposition header. 69 func (p *Part) FileName() string { 70 if p.dispositionParams == nil { 71 p.parseContentDisposition() 72 } 73 return p.dispositionParams["filename"] 74 } 75 76 func (p *Part) parseContentDisposition() { 77 v := p.Header.Get("Content-Disposition") 78 var err error 79 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 80 if err != nil { 81 p.dispositionParams = emptyParams 82 } 83 } 84 85 // NewReader creates a new multipart Reader reading from r using the 86 // given MIME boundary. 87 // 88 // The boundary is usually obtained from the "boundary" parameter of 89 // the message's "Content-Type" header. Use mime.ParseMediaType to 90 // parse such headers. 91 func NewReader(r io.Reader, boundary string) *Reader { 92 b := []byte("\r\n--" + boundary + "--") 93 return &Reader{ 94 bufReader: bufio.NewReader(r), 95 nl: b[:2], 96 nlDashBoundary: b[:len(b)-2], 97 dashBoundaryDash: b[2:], 98 dashBoundary: b[2 : len(b)-2], 99 } 100 } 101 102 func newPart(mr *Reader) (*Part, error) { 103 bp := &Part{ 104 Header: make(map[string][]string), 105 mr: mr, 106 buffer: new(bytes.Buffer), 107 } 108 if err := bp.populateHeaders(); err != nil { 109 return nil, err 110 } 111 bp.r = partReader{bp} 112 const cte = "Content-Transfer-Encoding" 113 if bp.Header.Get(cte) == "quoted-printable" { 114 bp.Header.Del(cte) 115 bp.r = quotedprintable.NewReader(bp.r) 116 } 117 return bp, nil 118 } 119 120 func (bp *Part) populateHeaders() error { 121 r := textproto.NewReader(bp.mr.bufReader) 122 header, err := r.ReadMIMEHeader() 123 if err == nil { 124 bp.Header = header 125 } 126 return err 127 } 128 129 // Read reads the body of a part, after its headers and before the 130 // next part (if any) begins. 131 func (p *Part) Read(d []byte) (n int, err error) { 132 return p.r.Read(d) 133 } 134 135 // partReader implements io.Reader by reading raw bytes directly from the 136 // wrapped *Part, without doing any Transfer-Encoding decoding. 137 type partReader struct { 138 p *Part 139 } 140 141 func (pr partReader) Read(d []byte) (n int, err error) { 142 p := pr.p 143 defer func() { 144 p.bytesRead += n 145 }() 146 if p.buffer.Len() >= len(d) { 147 // Internal buffer of unconsumed data is large enough for 148 // the read request. No need to parse more at the moment. 149 return p.buffer.Read(d) 150 } 151 peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor 152 153 // Look for an immediate empty part without a leading \r\n 154 // before the boundary separator. Some MIME code makes empty 155 // parts like this. Most browsers, however, write the \r\n 156 // before the subsequent boundary even for empty parts and 157 // won't hit this path. 158 if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) { 159 return 0, io.EOF 160 } 161 unexpectedEOF := err == io.EOF 162 if err != nil && !unexpectedEOF { 163 return 0, fmt.Errorf("multipart: Part Read: %v", err) 164 } 165 if peek == nil { 166 panic("nil peek buf") 167 } 168 // Search the peek buffer for "\r\n--boundary". If found, 169 // consume everything up to the boundary. If not, consume only 170 // as much of the peek buffer as cannot hold the boundary 171 // string. 172 nCopy := 0 173 foundBoundary := false 174 if idx, isEnd := p.mr.peekBufferSeparatorIndex(peek); idx != -1 { 175 nCopy = idx 176 foundBoundary = isEnd 177 if !isEnd && nCopy == 0 { 178 nCopy = 1 // make some progress. 179 } 180 } else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 { 181 nCopy = safeCount 182 } else if unexpectedEOF { 183 // If we've run out of peek buffer and the boundary 184 // wasn't found (and can't possibly fit), we must have 185 // hit the end of the file unexpectedly. 186 return 0, io.ErrUnexpectedEOF 187 } 188 if nCopy > 0 { 189 if _, err := io.CopyN(p.buffer, p.mr.bufReader, int64(nCopy)); err != nil { 190 return 0, err 191 } 192 } 193 n, err = p.buffer.Read(d) 194 if err == io.EOF && !foundBoundary { 195 // If the boundary hasn't been reached there's more to 196 // read, so don't pass through an EOF from the buffer 197 err = nil 198 } 199 return 200 } 201 202 func (p *Part) Close() error { 203 io.Copy(ioutil.Discard, p) 204 return nil 205 } 206 207 // Reader is an iterator over parts in a MIME multipart body. 208 // Reader's underlying parser consumes its input as needed. Seeking 209 // isn't supported. 210 type Reader struct { 211 bufReader *bufio.Reader 212 213 currentPart *Part 214 partsRead int 215 216 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 217 nlDashBoundary []byte // nl + "--boundary" 218 dashBoundaryDash []byte // "--boundary--" 219 dashBoundary []byte // "--boundary" 220 } 221 222 // NextPart returns the next part in the multipart or an error. 223 // When there are no more parts, the error io.EOF is returned. 224 func (r *Reader) NextPart() (*Part, error) { 225 if r.currentPart != nil { 226 r.currentPart.Close() 227 } 228 229 expectNewPart := false 230 for { 231 line, err := r.bufReader.ReadSlice('\n') 232 if err == io.EOF && r.isFinalBoundary(line) { 233 // If the buffer ends in "--boundary--" without the 234 // trailing "\r\n", ReadSlice will return an error 235 // (since it's missing the '\n'), but this is a valid 236 // multipart EOF so we need to return io.EOF instead of 237 // a fmt-wrapped one. 238 return nil, io.EOF 239 } 240 if err != nil { 241 return nil, fmt.Errorf("multipart: NextPart: %v", err) 242 } 243 244 if r.isBoundaryDelimiterLine(line) { 245 r.partsRead++ 246 bp, err := newPart(r) 247 if err != nil { 248 return nil, err 249 } 250 r.currentPart = bp 251 return bp, nil 252 } 253 254 if r.isFinalBoundary(line) { 255 // Expected EOF 256 return nil, io.EOF 257 } 258 259 if expectNewPart { 260 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 261 } 262 263 if r.partsRead == 0 { 264 // skip line 265 continue 266 } 267 268 // Consume the "\n" or "\r\n" separator between the 269 // body of the previous part and the boundary line we 270 // now expect will follow. (either a new part or the 271 // end boundary) 272 if bytes.Equal(line, r.nl) { 273 expectNewPart = true 274 continue 275 } 276 277 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 278 } 279 } 280 281 // isFinalBoundary reports whether line is the final boundary line 282 // indicating that all parts are over. 283 // It matches `^--boundary--[ \t]*(\r\n)?$` 284 func (mr *Reader) isFinalBoundary(line []byte) bool { 285 if !bytes.HasPrefix(line, mr.dashBoundaryDash) { 286 return false 287 } 288 rest := line[len(mr.dashBoundaryDash):] 289 rest = skipLWSPChar(rest) 290 return len(rest) == 0 || bytes.Equal(rest, mr.nl) 291 } 292 293 func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 294 // http://tools.ietf.org/html/rfc2046#section-5.1 295 // The boundary delimiter line is then defined as a line 296 // consisting entirely of two hyphen characters ("-", 297 // decimal value 45) followed by the boundary parameter 298 // value from the Content-Type header field, optional linear 299 // whitespace, and a terminating CRLF. 300 if !bytes.HasPrefix(line, mr.dashBoundary) { 301 return false 302 } 303 rest := line[len(mr.dashBoundary):] 304 rest = skipLWSPChar(rest) 305 306 // On the first part, see our lines are ending in \n instead of \r\n 307 // and switch into that mode if so. This is a violation of the spec, 308 // but occurs in practice. 309 if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 310 mr.nl = mr.nl[1:] 311 mr.nlDashBoundary = mr.nlDashBoundary[1:] 312 } 313 return bytes.Equal(rest, mr.nl) 314 } 315 316 // peekBufferIsEmptyPart reports whether the provided peek-ahead 317 // buffer represents an empty part. It is called only if we've not 318 // already read any bytes in this part and checks for the case of MIME 319 // software not writing the \r\n on empty parts. Some does, some 320 // doesn't. 321 // 322 // This checks that what follows the "--boundary" is actually the end 323 // ("--boundary--" with optional whitespace) or optional whitespace 324 // and then a newline, so we don't catch "--boundaryFAKE", in which 325 // case the whole line is part of the data. 326 func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool { 327 // End of parts case. 328 // Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)` 329 if bytes.HasPrefix(peek, mr.dashBoundaryDash) { 330 rest := peek[len(mr.dashBoundaryDash):] 331 rest = skipLWSPChar(rest) 332 return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0 333 } 334 if !bytes.HasPrefix(peek, mr.dashBoundary) { 335 return false 336 } 337 // Test whether rest matches `^[ \t]*\r\n`) 338 rest := peek[len(mr.dashBoundary):] 339 rest = skipLWSPChar(rest) 340 return bytes.HasPrefix(rest, mr.nl) 341 } 342 343 // peekBufferSeparatorIndex returns the index of mr.nlDashBoundary in 344 // peek and whether it is a real boundary (and not a prefix of an 345 // unrelated separator). To be the end, the peek buffer must contain a 346 // newline after the boundary. 347 func (mr *Reader) peekBufferSeparatorIndex(peek []byte) (idx int, isEnd bool) { 348 idx = bytes.Index(peek, mr.nlDashBoundary) 349 if idx == -1 { 350 return 351 } 352 peek = peek[idx+len(mr.nlDashBoundary):] 353 if len(peek) > 1 && peek[0] == '-' && peek[1] == '-' { 354 return idx, true 355 } 356 peek = skipLWSPChar(peek) 357 // Don't have a complete line after the peek. 358 if bytes.IndexByte(peek, '\n') == -1 { 359 return -1, false 360 } 361 if len(peek) > 0 && peek[0] == '\n' { 362 return idx, true 363 } 364 if len(peek) > 1 && peek[0] == '\r' && peek[1] == '\n' { 365 return idx, true 366 } 367 return idx, false 368 } 369 370 // skipLWSPChar returns b with leading spaces and tabs removed. 371 // RFC 822 defines: 372 // LWSP-char = SPACE / HTAB 373 func skipLWSPChar(b []byte) []byte { 374 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 375 b = b[1:] 376 } 377 return b 378 } 379