1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package mime 6 7 import ( 8 "bytes" 9 "encoding/base64" 10 "errors" 11 "fmt" 12 "io" 13 "strings" 14 "sync" 15 "unicode" 16 "unicode/utf8" 17 ) 18 19 // A WordEncoder is a RFC 2047 encoded-word encoder. 20 type WordEncoder byte 21 22 const ( 23 // BEncoding represents Base64 encoding scheme as defined by RFC 2045. 24 BEncoding = WordEncoder('b') 25 // QEncoding represents the Q-encoding scheme as defined by RFC 2047. 26 QEncoding = WordEncoder('q') 27 ) 28 29 var ( 30 errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word") 31 ) 32 33 // Encode returns the encoded-word form of s. If s is ASCII without special 34 // characters, it is returned unchanged. The provided charset is the IANA 35 // charset name of s. It is case insensitive. 36 func (e WordEncoder) Encode(charset, s string) string { 37 if !needsEncoding(s) { 38 return s 39 } 40 return e.encodeWord(charset, s) 41 } 42 43 func needsEncoding(s string) bool { 44 for _, b := range s { 45 if (b < ' ' || b > '~') && b != '\t' { 46 return true 47 } 48 } 49 return false 50 } 51 52 // encodeWord encodes a string into an encoded-word. 53 func (e WordEncoder) encodeWord(charset, s string) string { 54 buf := getBuffer() 55 defer putBuffer(buf) 56 57 buf.WriteString("=?") 58 buf.WriteString(charset) 59 buf.WriteByte('?') 60 buf.WriteByte(byte(e)) 61 buf.WriteByte('?') 62 63 if e == BEncoding { 64 w := base64.NewEncoder(base64.StdEncoding, buf) 65 io.WriteString(w, s) 66 w.Close() 67 } else { 68 enc := make([]byte, 3) 69 for i := 0; i < len(s); i++ { 70 b := s[i] 71 switch { 72 case b == ' ': 73 buf.WriteByte('_') 74 case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_': 75 buf.WriteByte(b) 76 default: 77 enc[0] = '=' 78 enc[1] = upperhex[b>>4] 79 enc[2] = upperhex[b&0x0f] 80 buf.Write(enc) 81 } 82 } 83 } 84 buf.WriteString("?=") 85 return buf.String() 86 } 87 88 const upperhex = "0123456789ABCDEF" 89 90 // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words. 91 type WordDecoder struct { 92 // CharsetReader, if non-nil, defines a function to generate 93 // charset-conversion readers, converting from the provided 94 // charset into UTF-8. 95 // Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets 96 // are handled by default. 97 // One of the the CharsetReader's result values must be non-nil. 98 CharsetReader func(charset string, input io.Reader) (io.Reader, error) 99 } 100 101 // Decode decodes an encoded-word. If word is not a valid RFC 2047 encoded-word, 102 // word is returned unchanged. 103 func (d *WordDecoder) Decode(word string) (string, error) { 104 fields := strings.Split(word, "?") // TODO: remove allocation? 105 if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 { 106 return "", errInvalidWord 107 } 108 109 content, err := decode(fields[2][0], fields[3]) 110 if err != nil { 111 return "", err 112 } 113 114 buf := getBuffer() 115 defer putBuffer(buf) 116 117 if err := d.convert(buf, fields[1], content); err != nil { 118 return "", err 119 } 120 121 return buf.String(), nil 122 } 123 124 // DecodeHeader decodes all encoded-words of the given string. It returns an 125 // error if and only if CharsetReader of d returns an error. 126 func (d *WordDecoder) DecodeHeader(header string) (string, error) { 127 // If there is no encoded-word, returns before creating a buffer. 128 i := strings.Index(header, "=?") 129 if i == -1 { 130 return header, nil 131 } 132 133 buf := getBuffer() 134 defer putBuffer(buf) 135 136 buf.WriteString(header[:i]) 137 header = header[i:] 138 139 betweenWords := false 140 for { 141 start := strings.Index(header, "=?") 142 if start == -1 { 143 break 144 } 145 cur := start + len("=?") 146 147 i := strings.Index(header[cur:], "?") 148 if i == -1 { 149 break 150 } 151 charset := header[cur : cur+i] 152 cur += i + len("?") 153 154 if len(header) < cur+len("Q??=") { 155 break 156 } 157 encoding := header[cur] 158 cur++ 159 160 if header[cur] != '?' { 161 break 162 } 163 cur++ 164 165 j := strings.Index(header[cur:], "?=") 166 if j == -1 { 167 break 168 } 169 text := header[cur : cur+j] 170 end := cur + j + len("?=") 171 172 content, err := decode(encoding, text) 173 if err != nil { 174 betweenWords = false 175 buf.WriteString(header[:start+2]) 176 header = header[start+2:] 177 continue 178 } 179 180 // Write characters before the encoded-word. White-space and newline 181 // characters separating two encoded-words must be deleted. 182 if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) { 183 buf.WriteString(header[:start]) 184 } 185 186 if err := d.convert(buf, charset, content); err != nil { 187 return "", err 188 } 189 190 header = header[end:] 191 betweenWords = true 192 } 193 194 if len(header) > 0 { 195 buf.WriteString(header) 196 } 197 198 return buf.String(), nil 199 } 200 201 func decode(encoding byte, text string) ([]byte, error) { 202 switch encoding { 203 case 'B', 'b': 204 return base64.StdEncoding.DecodeString(text) 205 case 'Q', 'q': 206 return qDecode(text) 207 default: 208 return nil, errInvalidWord 209 } 210 } 211 212 func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error { 213 switch { 214 case strings.EqualFold("utf-8", charset): 215 buf.Write(content) 216 case strings.EqualFold("iso-8859-1", charset): 217 for _, c := range content { 218 buf.WriteRune(rune(c)) 219 } 220 case strings.EqualFold("us-ascii", charset): 221 for _, c := range content { 222 if c >= utf8.RuneSelf { 223 buf.WriteRune(unicode.ReplacementChar) 224 } else { 225 buf.WriteByte(c) 226 } 227 } 228 default: 229 if d.CharsetReader == nil { 230 return fmt.Errorf("mime: unhandled charset %q", charset) 231 } 232 r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content)) 233 if err != nil { 234 return err 235 } 236 if _, err = buf.ReadFrom(r); err != nil { 237 return err 238 } 239 } 240 return nil 241 } 242 243 // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least 244 // one byte of non-whitespace. 245 func hasNonWhitespace(s string) bool { 246 for _, b := range s { 247 switch b { 248 // Encoded-words can only be separated by linear white spaces which does 249 // not include vertical tabs (\v). 250 case ' ', '\t', '\n', '\r': 251 default: 252 return true 253 } 254 } 255 return false 256 } 257 258 // qDecode decodes a Q encoded string. 259 func qDecode(s string) ([]byte, error) { 260 dec := make([]byte, len(s)) 261 n := 0 262 for i := 0; i < len(s); i++ { 263 switch c := s[i]; { 264 case c == '_': 265 dec[n] = ' ' 266 case c == '=': 267 if i+2 >= len(s) { 268 return nil, errInvalidWord 269 } 270 b, err := readHexByte(s[i+1], s[i+2]) 271 if err != nil { 272 return nil, err 273 } 274 dec[n] = b 275 i += 2 276 case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t': 277 dec[n] = c 278 default: 279 return nil, errInvalidWord 280 } 281 n++ 282 } 283 284 return dec[:n], nil 285 } 286 287 // readHexByte returns the byte from its quoted-printable representation. 288 func readHexByte(a, b byte) (byte, error) { 289 var hb, lb byte 290 var err error 291 if hb, err = fromHex(a); err != nil { 292 return 0, err 293 } 294 if lb, err = fromHex(b); err != nil { 295 return 0, err 296 } 297 return hb<<4 | lb, nil 298 } 299 300 func fromHex(b byte) (byte, error) { 301 switch { 302 case b >= '0' && b <= '9': 303 return b - '0', nil 304 case b >= 'A' && b <= 'F': 305 return b - 'A' + 10, nil 306 // Accept badly encoded bytes. 307 case b >= 'a' && b <= 'f': 308 return b - 'a' + 10, nil 309 } 310 return 0, fmt.Errorf("mime: invalid hex byte %#02x", b) 311 } 312 313 var bufPool = sync.Pool{ 314 New: func() interface{} { 315 return new(bytes.Buffer) 316 }, 317 } 318 319 func getBuffer() *bytes.Buffer { 320 return bufPool.Get().(*bytes.Buffer) 321 } 322 323 func putBuffer(buf *bytes.Buffer) { 324 if buf.Len() > 1024 { 325 return 326 } 327 buf.Reset() 328 bufPool.Put(buf) 329 } 330