Home | History | Annotate | Download | only in mime
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package mime
      6 
      7 import (
      8 	"bytes"
      9 	"encoding/base64"
     10 	"errors"
     11 	"fmt"
     12 	"io"
     13 	"strings"
     14 	"sync"
     15 	"unicode"
     16 	"unicode/utf8"
     17 )
     18 
     19 // A WordEncoder is a RFC 2047 encoded-word encoder.
     20 type WordEncoder byte
     21 
     22 const (
     23 	// BEncoding represents Base64 encoding scheme as defined by RFC 2045.
     24 	BEncoding = WordEncoder('b')
     25 	// QEncoding represents the Q-encoding scheme as defined by RFC 2047.
     26 	QEncoding = WordEncoder('q')
     27 )
     28 
     29 var (
     30 	errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
     31 )
     32 
     33 // Encode returns the encoded-word form of s. If s is ASCII without special
     34 // characters, it is returned unchanged. The provided charset is the IANA
     35 // charset name of s. It is case insensitive.
     36 func (e WordEncoder) Encode(charset, s string) string {
     37 	if !needsEncoding(s) {
     38 		return s
     39 	}
     40 	return e.encodeWord(charset, s)
     41 }
     42 
     43 func needsEncoding(s string) bool {
     44 	for _, b := range s {
     45 		if (b < ' ' || b > '~') && b != '\t' {
     46 			return true
     47 		}
     48 	}
     49 	return false
     50 }
     51 
     52 // encodeWord encodes a string into an encoded-word.
     53 func (e WordEncoder) encodeWord(charset, s string) string {
     54 	buf := getBuffer()
     55 	defer putBuffer(buf)
     56 
     57 	buf.WriteString("=?")
     58 	buf.WriteString(charset)
     59 	buf.WriteByte('?')
     60 	buf.WriteByte(byte(e))
     61 	buf.WriteByte('?')
     62 
     63 	if e == BEncoding {
     64 		w := base64.NewEncoder(base64.StdEncoding, buf)
     65 		io.WriteString(w, s)
     66 		w.Close()
     67 	} else {
     68 		enc := make([]byte, 3)
     69 		for i := 0; i < len(s); i++ {
     70 			b := s[i]
     71 			switch {
     72 			case b == ' ':
     73 				buf.WriteByte('_')
     74 			case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_':
     75 				buf.WriteByte(b)
     76 			default:
     77 				enc[0] = '='
     78 				enc[1] = upperhex[b>>4]
     79 				enc[2] = upperhex[b&0x0f]
     80 				buf.Write(enc)
     81 			}
     82 		}
     83 	}
     84 	buf.WriteString("?=")
     85 	return buf.String()
     86 }
     87 
     88 const upperhex = "0123456789ABCDEF"
     89 
     90 // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
     91 type WordDecoder struct {
     92 	// CharsetReader, if non-nil, defines a function to generate
     93 	// charset-conversion readers, converting from the provided
     94 	// charset into UTF-8.
     95 	// Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
     96 	// are handled by default.
     97 	// One of the the CharsetReader's result values must be non-nil.
     98 	CharsetReader func(charset string, input io.Reader) (io.Reader, error)
     99 }
    100 
    101 // Decode decodes an encoded-word. If word is not a valid RFC 2047 encoded-word,
    102 // word is returned unchanged.
    103 func (d *WordDecoder) Decode(word string) (string, error) {
    104 	fields := strings.Split(word, "?") // TODO: remove allocation?
    105 	if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 {
    106 		return "", errInvalidWord
    107 	}
    108 
    109 	content, err := decode(fields[2][0], fields[3])
    110 	if err != nil {
    111 		return "", err
    112 	}
    113 
    114 	buf := getBuffer()
    115 	defer putBuffer(buf)
    116 
    117 	if err := d.convert(buf, fields[1], content); err != nil {
    118 		return "", err
    119 	}
    120 
    121 	return buf.String(), nil
    122 }
    123 
    124 // DecodeHeader decodes all encoded-words of the given string. It returns an
    125 // error if and only if CharsetReader of d returns an error.
    126 func (d *WordDecoder) DecodeHeader(header string) (string, error) {
    127 	// If there is no encoded-word, returns before creating a buffer.
    128 	i := strings.Index(header, "=?")
    129 	if i == -1 {
    130 		return header, nil
    131 	}
    132 
    133 	buf := getBuffer()
    134 	defer putBuffer(buf)
    135 
    136 	buf.WriteString(header[:i])
    137 	header = header[i:]
    138 
    139 	betweenWords := false
    140 	for {
    141 		start := strings.Index(header, "=?")
    142 		if start == -1 {
    143 			break
    144 		}
    145 		cur := start + len("=?")
    146 
    147 		i := strings.Index(header[cur:], "?")
    148 		if i == -1 {
    149 			break
    150 		}
    151 		charset := header[cur : cur+i]
    152 		cur += i + len("?")
    153 
    154 		if len(header) < cur+len("Q??=") {
    155 			break
    156 		}
    157 		encoding := header[cur]
    158 		cur++
    159 
    160 		if header[cur] != '?' {
    161 			break
    162 		}
    163 		cur++
    164 
    165 		j := strings.Index(header[cur:], "?=")
    166 		if j == -1 {
    167 			break
    168 		}
    169 		text := header[cur : cur+j]
    170 		end := cur + j + len("?=")
    171 
    172 		content, err := decode(encoding, text)
    173 		if err != nil {
    174 			betweenWords = false
    175 			buf.WriteString(header[:start+2])
    176 			header = header[start+2:]
    177 			continue
    178 		}
    179 
    180 		// Write characters before the encoded-word. White-space and newline
    181 		// characters separating two encoded-words must be deleted.
    182 		if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
    183 			buf.WriteString(header[:start])
    184 		}
    185 
    186 		if err := d.convert(buf, charset, content); err != nil {
    187 			return "", err
    188 		}
    189 
    190 		header = header[end:]
    191 		betweenWords = true
    192 	}
    193 
    194 	if len(header) > 0 {
    195 		buf.WriteString(header)
    196 	}
    197 
    198 	return buf.String(), nil
    199 }
    200 
    201 func decode(encoding byte, text string) ([]byte, error) {
    202 	switch encoding {
    203 	case 'B', 'b':
    204 		return base64.StdEncoding.DecodeString(text)
    205 	case 'Q', 'q':
    206 		return qDecode(text)
    207 	default:
    208 		return nil, errInvalidWord
    209 	}
    210 }
    211 
    212 func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error {
    213 	switch {
    214 	case strings.EqualFold("utf-8", charset):
    215 		buf.Write(content)
    216 	case strings.EqualFold("iso-8859-1", charset):
    217 		for _, c := range content {
    218 			buf.WriteRune(rune(c))
    219 		}
    220 	case strings.EqualFold("us-ascii", charset):
    221 		for _, c := range content {
    222 			if c >= utf8.RuneSelf {
    223 				buf.WriteRune(unicode.ReplacementChar)
    224 			} else {
    225 				buf.WriteByte(c)
    226 			}
    227 		}
    228 	default:
    229 		if d.CharsetReader == nil {
    230 			return fmt.Errorf("mime: unhandled charset %q", charset)
    231 		}
    232 		r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
    233 		if err != nil {
    234 			return err
    235 		}
    236 		if _, err = buf.ReadFrom(r); err != nil {
    237 			return err
    238 		}
    239 	}
    240 	return nil
    241 }
    242 
    243 // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
    244 // one byte of non-whitespace.
    245 func hasNonWhitespace(s string) bool {
    246 	for _, b := range s {
    247 		switch b {
    248 		// Encoded-words can only be separated by linear white spaces which does
    249 		// not include vertical tabs (\v).
    250 		case ' ', '\t', '\n', '\r':
    251 		default:
    252 			return true
    253 		}
    254 	}
    255 	return false
    256 }
    257 
    258 // qDecode decodes a Q encoded string.
    259 func qDecode(s string) ([]byte, error) {
    260 	dec := make([]byte, len(s))
    261 	n := 0
    262 	for i := 0; i < len(s); i++ {
    263 		switch c := s[i]; {
    264 		case c == '_':
    265 			dec[n] = ' '
    266 		case c == '=':
    267 			if i+2 >= len(s) {
    268 				return nil, errInvalidWord
    269 			}
    270 			b, err := readHexByte(s[i+1], s[i+2])
    271 			if err != nil {
    272 				return nil, err
    273 			}
    274 			dec[n] = b
    275 			i += 2
    276 		case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
    277 			dec[n] = c
    278 		default:
    279 			return nil, errInvalidWord
    280 		}
    281 		n++
    282 	}
    283 
    284 	return dec[:n], nil
    285 }
    286 
    287 // readHexByte returns the byte from its quoted-printable representation.
    288 func readHexByte(a, b byte) (byte, error) {
    289 	var hb, lb byte
    290 	var err error
    291 	if hb, err = fromHex(a); err != nil {
    292 		return 0, err
    293 	}
    294 	if lb, err = fromHex(b); err != nil {
    295 		return 0, err
    296 	}
    297 	return hb<<4 | lb, nil
    298 }
    299 
    300 func fromHex(b byte) (byte, error) {
    301 	switch {
    302 	case b >= '0' && b <= '9':
    303 		return b - '0', nil
    304 	case b >= 'A' && b <= 'F':
    305 		return b - 'A' + 10, nil
    306 	// Accept badly encoded bytes.
    307 	case b >= 'a' && b <= 'f':
    308 		return b - 'a' + 10, nil
    309 	}
    310 	return 0, fmt.Errorf("mime: invalid hex byte %#02x", b)
    311 }
    312 
    313 var bufPool = sync.Pool{
    314 	New: func() interface{} {
    315 		return new(bytes.Buffer)
    316 	},
    317 }
    318 
    319 func getBuffer() *bytes.Buffer {
    320 	return bufPool.Get().(*bytes.Buffer)
    321 }
    322 
    323 func putBuffer(buf *bytes.Buffer) {
    324 	if buf.Len() > 1024 {
    325 		return
    326 	}
    327 	buf.Reset()
    328 	bufPool.Put(buf)
    329 }
    330