Home | History | Annotate | Download | only in textproto
      1 // Copyright 2010 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package textproto
      6 
      7 import (
      8 	"bufio"
      9 	"bytes"
     10 	"io"
     11 	"io/ioutil"
     12 	"strconv"
     13 	"strings"
     14 )
     15 
     16 // A Reader implements convenience methods for reading requests
     17 // or responses from a text protocol network connection.
     18 type Reader struct {
     19 	R   *bufio.Reader
     20 	dot *dotReader
     21 	buf []byte // a re-usable buffer for readContinuedLineSlice
     22 }
     23 
     24 // NewReader returns a new Reader reading from r.
     25 //
     26 // To avoid denial of service attacks, the provided bufio.Reader
     27 // should be reading from an io.LimitReader or similar Reader to bound
     28 // the size of responses.
     29 func NewReader(r *bufio.Reader) *Reader {
     30 	return &Reader{R: r}
     31 }
     32 
     33 // ReadLine reads a single line from r,
     34 // eliding the final \n or \r\n from the returned string.
     35 func (r *Reader) ReadLine() (string, error) {
     36 	line, err := r.readLineSlice()
     37 	return string(line), err
     38 }
     39 
     40 // ReadLineBytes is like ReadLine but returns a []byte instead of a string.
     41 func (r *Reader) ReadLineBytes() ([]byte, error) {
     42 	line, err := r.readLineSlice()
     43 	if line != nil {
     44 		buf := make([]byte, len(line))
     45 		copy(buf, line)
     46 		line = buf
     47 	}
     48 	return line, err
     49 }
     50 
     51 func (r *Reader) readLineSlice() ([]byte, error) {
     52 	r.closeDot()
     53 	var line []byte
     54 	for {
     55 		l, more, err := r.R.ReadLine()
     56 		if err != nil {
     57 			return nil, err
     58 		}
     59 		// Avoid the copy if the first call produced a full line.
     60 		if line == nil && !more {
     61 			return l, nil
     62 		}
     63 		line = append(line, l...)
     64 		if !more {
     65 			break
     66 		}
     67 	}
     68 	return line, nil
     69 }
     70 
     71 // ReadContinuedLine reads a possibly continued line from r,
     72 // eliding the final trailing ASCII white space.
     73 // Lines after the first are considered continuations if they
     74 // begin with a space or tab character.  In the returned data,
     75 // continuation lines are separated from the previous line
     76 // only by a single space: the newline and leading white space
     77 // are removed.
     78 //
     79 // For example, consider this input:
     80 //
     81 //	Line 1
     82 //	  continued...
     83 //	Line 2
     84 //
     85 // The first call to ReadContinuedLine will return "Line 1 continued..."
     86 // and the second will return "Line 2".
     87 //
     88 // A line consisting of only white space is never continued.
     89 //
     90 func (r *Reader) ReadContinuedLine() (string, error) {
     91 	line, err := r.readContinuedLineSlice()
     92 	return string(line), err
     93 }
     94 
     95 // trim returns s with leading and trailing spaces and tabs removed.
     96 // It does not assume Unicode or UTF-8.
     97 func trim(s []byte) []byte {
     98 	i := 0
     99 	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
    100 		i++
    101 	}
    102 	n := len(s)
    103 	for n > i && (s[n-1] == ' ' || s[n-1] == '\t') {
    104 		n--
    105 	}
    106 	return s[i:n]
    107 }
    108 
    109 // ReadContinuedLineBytes is like ReadContinuedLine but
    110 // returns a []byte instead of a string.
    111 func (r *Reader) ReadContinuedLineBytes() ([]byte, error) {
    112 	line, err := r.readContinuedLineSlice()
    113 	if line != nil {
    114 		buf := make([]byte, len(line))
    115 		copy(buf, line)
    116 		line = buf
    117 	}
    118 	return line, err
    119 }
    120 
    121 func (r *Reader) readContinuedLineSlice() ([]byte, error) {
    122 	// Read the first line.
    123 	line, err := r.readLineSlice()
    124 	if err != nil {
    125 		return nil, err
    126 	}
    127 	if len(line) == 0 { // blank line - no continuation
    128 		return line, nil
    129 	}
    130 
    131 	// Optimistically assume that we have started to buffer the next line
    132 	// and it starts with an ASCII letter (the next header key), so we can
    133 	// avoid copying that buffered data around in memory and skipping over
    134 	// non-existent whitespace.
    135 	if r.R.Buffered() > 1 {
    136 		peek, err := r.R.Peek(1)
    137 		if err == nil && isASCIILetter(peek[0]) {
    138 			return trim(line), nil
    139 		}
    140 	}
    141 
    142 	// ReadByte or the next readLineSlice will flush the read buffer;
    143 	// copy the slice into buf.
    144 	r.buf = append(r.buf[:0], trim(line)...)
    145 
    146 	// Read continuation lines.
    147 	for r.skipSpace() > 0 {
    148 		line, err := r.readLineSlice()
    149 		if err != nil {
    150 			break
    151 		}
    152 		r.buf = append(r.buf, ' ')
    153 		r.buf = append(r.buf, line...)
    154 	}
    155 	return r.buf, nil
    156 }
    157 
    158 // skipSpace skips R over all spaces and returns the number of bytes skipped.
    159 func (r *Reader) skipSpace() int {
    160 	n := 0
    161 	for {
    162 		c, err := r.R.ReadByte()
    163 		if err != nil {
    164 			// Bufio will keep err until next read.
    165 			break
    166 		}
    167 		if c != ' ' && c != '\t' {
    168 			r.R.UnreadByte()
    169 			break
    170 		}
    171 		n++
    172 	}
    173 	return n
    174 }
    175 
    176 func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) {
    177 	line, err := r.ReadLine()
    178 	if err != nil {
    179 		return
    180 	}
    181 	return parseCodeLine(line, expectCode)
    182 }
    183 
    184 func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) {
    185 	if len(line) < 4 || line[3] != ' ' && line[3] != '-' {
    186 		err = ProtocolError("short response: " + line)
    187 		return
    188 	}
    189 	continued = line[3] == '-'
    190 	code, err = strconv.Atoi(line[0:3])
    191 	if err != nil || code < 100 {
    192 		err = ProtocolError("invalid response code: " + line)
    193 		return
    194 	}
    195 	message = line[4:]
    196 	if 1 <= expectCode && expectCode < 10 && code/100 != expectCode ||
    197 		10 <= expectCode && expectCode < 100 && code/10 != expectCode ||
    198 		100 <= expectCode && expectCode < 1000 && code != expectCode {
    199 		err = &Error{code, message}
    200 	}
    201 	return
    202 }
    203 
    204 // ReadCodeLine reads a response code line of the form
    205 //	code message
    206 // where code is a three-digit status code and the message
    207 // extends to the rest of the line.  An example of such a line is:
    208 //	220 plan9.bell-labs.com ESMTP
    209 //
    210 // If the prefix of the status does not match the digits in expectCode,
    211 // ReadCodeLine returns with err set to &Error{code, message}.
    212 // For example, if expectCode is 31, an error will be returned if
    213 // the status is not in the range [310,319].
    214 //
    215 // If the response is multi-line, ReadCodeLine returns an error.
    216 //
    217 // An expectCode <= 0 disables the check of the status code.
    218 //
    219 func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) {
    220 	code, continued, message, err := r.readCodeLine(expectCode)
    221 	if err == nil && continued {
    222 		err = ProtocolError("unexpected multi-line response: " + message)
    223 	}
    224 	return
    225 }
    226 
    227 // ReadResponse reads a multi-line response of the form:
    228 //
    229 //	code-message line 1
    230 //	code-message line 2
    231 //	...
    232 //	code message line n
    233 //
    234 // where code is a three-digit status code. The first line starts with the
    235 // code and a hyphen. The response is terminated by a line that starts
    236 // with the same code followed by a space. Each line in message is
    237 // separated by a newline (\n).
    238 //
    239 // See page 36 of RFC 959 (http://www.ietf.org/rfc/rfc959.txt) for
    240 // details.
    241 //
    242 // If the prefix of the status does not match the digits in expectCode,
    243 // ReadResponse returns with err set to &Error{code, message}.
    244 // For example, if expectCode is 31, an error will be returned if
    245 // the status is not in the range [310,319].
    246 //
    247 // An expectCode <= 0 disables the check of the status code.
    248 //
    249 func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) {
    250 	code, continued, message, err := r.readCodeLine(expectCode)
    251 	for err == nil && continued {
    252 		line, err := r.ReadLine()
    253 		if err != nil {
    254 			return 0, "", err
    255 		}
    256 
    257 		var code2 int
    258 		var moreMessage string
    259 		code2, continued, moreMessage, err = parseCodeLine(line, expectCode)
    260 		if err != nil || code2 != code {
    261 			message += "\n" + strings.TrimRight(line, "\r\n")
    262 			continued = true
    263 			continue
    264 		}
    265 		message += "\n" + moreMessage
    266 	}
    267 	return
    268 }
    269 
    270 // DotReader returns a new Reader that satisfies Reads using the
    271 // decoded text of a dot-encoded block read from r.
    272 // The returned Reader is only valid until the next call
    273 // to a method on r.
    274 //
    275 // Dot encoding is a common framing used for data blocks
    276 // in text protocols such as SMTP.  The data consists of a sequence
    277 // of lines, each of which ends in "\r\n".  The sequence itself
    278 // ends at a line containing just a dot: ".\r\n".  Lines beginning
    279 // with a dot are escaped with an additional dot to avoid
    280 // looking like the end of the sequence.
    281 //
    282 // The decoded form returned by the Reader's Read method
    283 // rewrites the "\r\n" line endings into the simpler "\n",
    284 // removes leading dot escapes if present, and stops with error io.EOF
    285 // after consuming (and discarding) the end-of-sequence line.
    286 func (r *Reader) DotReader() io.Reader {
    287 	r.closeDot()
    288 	r.dot = &dotReader{r: r}
    289 	return r.dot
    290 }
    291 
    292 type dotReader struct {
    293 	r     *Reader
    294 	state int
    295 }
    296 
    297 // Read satisfies reads by decoding dot-encoded data read from d.r.
    298 func (d *dotReader) Read(b []byte) (n int, err error) {
    299 	// Run data through a simple state machine to
    300 	// elide leading dots, rewrite trailing \r\n into \n,
    301 	// and detect ending .\r\n line.
    302 	const (
    303 		stateBeginLine = iota // beginning of line; initial state; must be zero
    304 		stateDot              // read . at beginning of line
    305 		stateDotCR            // read .\r at beginning of line
    306 		stateCR               // read \r (possibly at end of line)
    307 		stateData             // reading data in middle of line
    308 		stateEOF              // reached .\r\n end marker line
    309 	)
    310 	br := d.r.R
    311 	for n < len(b) && d.state != stateEOF {
    312 		var c byte
    313 		c, err = br.ReadByte()
    314 		if err != nil {
    315 			if err == io.EOF {
    316 				err = io.ErrUnexpectedEOF
    317 			}
    318 			break
    319 		}
    320 		switch d.state {
    321 		case stateBeginLine:
    322 			if c == '.' {
    323 				d.state = stateDot
    324 				continue
    325 			}
    326 			if c == '\r' {
    327 				d.state = stateCR
    328 				continue
    329 			}
    330 			d.state = stateData
    331 
    332 		case stateDot:
    333 			if c == '\r' {
    334 				d.state = stateDotCR
    335 				continue
    336 			}
    337 			if c == '\n' {
    338 				d.state = stateEOF
    339 				continue
    340 			}
    341 			d.state = stateData
    342 
    343 		case stateDotCR:
    344 			if c == '\n' {
    345 				d.state = stateEOF
    346 				continue
    347 			}
    348 			// Not part of .\r\n.
    349 			// Consume leading dot and emit saved \r.
    350 			br.UnreadByte()
    351 			c = '\r'
    352 			d.state = stateData
    353 
    354 		case stateCR:
    355 			if c == '\n' {
    356 				d.state = stateBeginLine
    357 				break
    358 			}
    359 			// Not part of \r\n.  Emit saved \r
    360 			br.UnreadByte()
    361 			c = '\r'
    362 			d.state = stateData
    363 
    364 		case stateData:
    365 			if c == '\r' {
    366 				d.state = stateCR
    367 				continue
    368 			}
    369 			if c == '\n' {
    370 				d.state = stateBeginLine
    371 			}
    372 		}
    373 		b[n] = c
    374 		n++
    375 	}
    376 	if err == nil && d.state == stateEOF {
    377 		err = io.EOF
    378 	}
    379 	if err != nil && d.r.dot == d {
    380 		d.r.dot = nil
    381 	}
    382 	return
    383 }
    384 
    385 // closeDot drains the current DotReader if any,
    386 // making sure that it reads until the ending dot line.
    387 func (r *Reader) closeDot() {
    388 	if r.dot == nil {
    389 		return
    390 	}
    391 	buf := make([]byte, 128)
    392 	for r.dot != nil {
    393 		// When Read reaches EOF or an error,
    394 		// it will set r.dot == nil.
    395 		r.dot.Read(buf)
    396 	}
    397 }
    398 
    399 // ReadDotBytes reads a dot-encoding and returns the decoded data.
    400 //
    401 // See the documentation for the DotReader method for details about dot-encoding.
    402 func (r *Reader) ReadDotBytes() ([]byte, error) {
    403 	return ioutil.ReadAll(r.DotReader())
    404 }
    405 
    406 // ReadDotLines reads a dot-encoding and returns a slice
    407 // containing the decoded lines, with the final \r\n or \n elided from each.
    408 //
    409 // See the documentation for the DotReader method for details about dot-encoding.
    410 func (r *Reader) ReadDotLines() ([]string, error) {
    411 	// We could use ReadDotBytes and then Split it,
    412 	// but reading a line at a time avoids needing a
    413 	// large contiguous block of memory and is simpler.
    414 	var v []string
    415 	var err error
    416 	for {
    417 		var line string
    418 		line, err = r.ReadLine()
    419 		if err != nil {
    420 			if err == io.EOF {
    421 				err = io.ErrUnexpectedEOF
    422 			}
    423 			break
    424 		}
    425 
    426 		// Dot by itself marks end; otherwise cut one dot.
    427 		if len(line) > 0 && line[0] == '.' {
    428 			if len(line) == 1 {
    429 				break
    430 			}
    431 			line = line[1:]
    432 		}
    433 		v = append(v, line)
    434 	}
    435 	return v, err
    436 }
    437 
    438 // ReadMIMEHeader reads a MIME-style header from r.
    439 // The header is a sequence of possibly continued Key: Value lines
    440 // ending in a blank line.
    441 // The returned map m maps CanonicalMIMEHeaderKey(key) to a
    442 // sequence of values in the same order encountered in the input.
    443 //
    444 // For example, consider this input:
    445 //
    446 //	My-Key: Value 1
    447 //	Long-Key: Even
    448 //	       Longer Value
    449 //	My-Key: Value 2
    450 //
    451 // Given that input, ReadMIMEHeader returns the map:
    452 //
    453 //	map[string][]string{
    454 //		"My-Key": {"Value 1", "Value 2"},
    455 //		"Long-Key": {"Even Longer Value"},
    456 //	}
    457 //
    458 func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) {
    459 	// Avoid lots of small slice allocations later by allocating one
    460 	// large one ahead of time which we'll cut up into smaller
    461 	// slices. If this isn't big enough later, we allocate small ones.
    462 	var strs []string
    463 	hint := r.upcomingHeaderNewlines()
    464 	if hint > 0 {
    465 		strs = make([]string, hint)
    466 	}
    467 
    468 	m := make(MIMEHeader, hint)
    469 	for {
    470 		kv, err := r.readContinuedLineSlice()
    471 		if len(kv) == 0 {
    472 			return m, err
    473 		}
    474 
    475 		// Key ends at first colon; should not have spaces but
    476 		// they appear in the wild, violating specs, so we
    477 		// remove them if present.
    478 		i := bytes.IndexByte(kv, ':')
    479 		if i < 0 {
    480 			return m, ProtocolError("malformed MIME header line: " + string(kv))
    481 		}
    482 		endKey := i
    483 		for endKey > 0 && kv[endKey-1] == ' ' {
    484 			endKey--
    485 		}
    486 		key := canonicalMIMEHeaderKey(kv[:endKey])
    487 
    488 		// As per RFC 7230 field-name is a token, tokens consist of one or more chars.
    489 		// We could return a ProtocolError here, but better to be liberal in what we
    490 		// accept, so if we get an empty key, skip it.
    491 		if key == "" {
    492 			continue
    493 		}
    494 
    495 		// Skip initial spaces in value.
    496 		i++ // skip colon
    497 		for i < len(kv) && (kv[i] == ' ' || kv[i] == '\t') {
    498 			i++
    499 		}
    500 		value := string(kv[i:])
    501 
    502 		vv := m[key]
    503 		if vv == nil && len(strs) > 0 {
    504 			// More than likely this will be a single-element key.
    505 			// Most headers aren't multi-valued.
    506 			// Set the capacity on strs[0] to 1, so any future append
    507 			// won't extend the slice into the other strings.
    508 			vv, strs = strs[:1:1], strs[1:]
    509 			vv[0] = value
    510 			m[key] = vv
    511 		} else {
    512 			m[key] = append(vv, value)
    513 		}
    514 
    515 		if err != nil {
    516 			return m, err
    517 		}
    518 	}
    519 }
    520 
    521 // upcomingHeaderNewlines returns an approximation of the number of newlines
    522 // that will be in this header. If it gets confused, it returns 0.
    523 func (r *Reader) upcomingHeaderNewlines() (n int) {
    524 	// Try to determine the 'hint' size.
    525 	r.R.Peek(1) // force a buffer load if empty
    526 	s := r.R.Buffered()
    527 	if s == 0 {
    528 		return
    529 	}
    530 	peek, _ := r.R.Peek(s)
    531 	for len(peek) > 0 {
    532 		i := bytes.IndexByte(peek, '\n')
    533 		if i < 3 {
    534 			// Not present (-1) or found within the next few bytes,
    535 			// implying we're at the end ("\r\n\r\n" or "\n\n")
    536 			return
    537 		}
    538 		n++
    539 		peek = peek[i+1:]
    540 	}
    541 	return
    542 }
    543 
    544 // CanonicalMIMEHeaderKey returns the canonical format of the
    545 // MIME header key s.  The canonicalization converts the first
    546 // letter and any letter following a hyphen to upper case;
    547 // the rest are converted to lowercase.  For example, the
    548 // canonical key for "accept-encoding" is "Accept-Encoding".
    549 // MIME header keys are assumed to be ASCII only.
    550 // If s contains a space or invalid header field bytes, it is
    551 // returned without modifications.
    552 func CanonicalMIMEHeaderKey(s string) string {
    553 	// Quick check for canonical encoding.
    554 	upper := true
    555 	for i := 0; i < len(s); i++ {
    556 		c := s[i]
    557 		if !validHeaderFieldByte(c) {
    558 			return s
    559 		}
    560 		if upper && 'a' <= c && c <= 'z' {
    561 			return canonicalMIMEHeaderKey([]byte(s))
    562 		}
    563 		if !upper && 'A' <= c && c <= 'Z' {
    564 			return canonicalMIMEHeaderKey([]byte(s))
    565 		}
    566 		upper = c == '-'
    567 	}
    568 	return s
    569 }
    570 
    571 const toLower = 'a' - 'A'
    572 
    573 // validHeaderFieldByte reports whether b is a valid byte in a header
    574 // field key. This is actually stricter than RFC 7230, which says:
    575 //   tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
    576 //           "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
    577 //   token = 1*tchar
    578 // TODO: revisit in Go 1.6+ and possibly expand this. But note that many
    579 // servers have historically dropped '_' to prevent ambiguities when mapping
    580 // to CGI environment variables.
    581 func validHeaderFieldByte(b byte) bool {
    582 	return ('A' <= b && b <= 'Z') ||
    583 		('a' <= b && b <= 'z') ||
    584 		('0' <= b && b <= '9') ||
    585 		b == '-'
    586 }
    587 
    588 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
    589 // allowed to mutate the provided byte slice before returning the
    590 // string.
    591 //
    592 // For invalid inputs (if a contains spaces or non-token bytes), a
    593 // is unchanged and a string copy is returned.
    594 func canonicalMIMEHeaderKey(a []byte) string {
    595 	// See if a looks like a header key. If not, return it unchanged.
    596 	for _, c := range a {
    597 		if validHeaderFieldByte(c) {
    598 			continue
    599 		}
    600 		// Don't canonicalize.
    601 		return string(a)
    602 	}
    603 
    604 	upper := true
    605 	for i, c := range a {
    606 		// Canonicalize: first letter upper case
    607 		// and upper case after each dash.
    608 		// (Host, User-Agent, If-Modified-Since).
    609 		// MIME headers are ASCII only, so no Unicode issues.
    610 		if upper && 'a' <= c && c <= 'z' {
    611 			c -= toLower
    612 		} else if !upper && 'A' <= c && c <= 'Z' {
    613 			c += toLower
    614 		}
    615 		a[i] = c
    616 		upper = c == '-' // for next time
    617 	}
    618 	// The compiler recognizes m[string(byteSlice)] as a special
    619 	// case, so a copy of a's bytes into a new string does not
    620 	// happen in this map lookup:
    621 	if v := commonHeader[string(a)]; v != "" {
    622 		return v
    623 	}
    624 	return string(a)
    625 }
    626 
    627 // commonHeader interns common header strings.
    628 var commonHeader = make(map[string]string)
    629 
    630 func init() {
    631 	for _, v := range []string{
    632 		"Accept",
    633 		"Accept-Charset",
    634 		"Accept-Encoding",
    635 		"Accept-Language",
    636 		"Accept-Ranges",
    637 		"Cache-Control",
    638 		"Cc",
    639 		"Connection",
    640 		"Content-Id",
    641 		"Content-Language",
    642 		"Content-Length",
    643 		"Content-Transfer-Encoding",
    644 		"Content-Type",
    645 		"Cookie",
    646 		"Date",
    647 		"Dkim-Signature",
    648 		"Etag",
    649 		"Expires",
    650 		"From",
    651 		"Host",
    652 		"If-Modified-Since",
    653 		"If-None-Match",
    654 		"In-Reply-To",
    655 		"Last-Modified",
    656 		"Location",
    657 		"Message-Id",
    658 		"Mime-Version",
    659 		"Pragma",
    660 		"Received",
    661 		"Return-Path",
    662 		"Server",
    663 		"Set-Cookie",
    664 		"Subject",
    665 		"To",
    666 		"User-Agent",
    667 		"Via",
    668 		"X-Forwarded-For",
    669 		"X-Imforwards",
    670 		"X-Powered-By",
    671 	} {
    672 		commonHeader[v] = v
    673 	}
    674 }
    675