Home | History | Annotate | Download | only in mime
      1 // Copyright 2010 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package mime
      6 
      7 import (
      8 	"bytes"
      9 	"errors"
     10 	"fmt"
     11 	"sort"
     12 	"strings"
     13 	"unicode"
     14 )
     15 
     16 // FormatMediaType serializes mediatype t and the parameters
     17 // param as a media type conforming to RFC 2045 and RFC 2616.
     18 // The type and parameter names are written in lower-case.
     19 // When any of the arguments result in a standard violation then
     20 // FormatMediaType returns the empty string.
     21 func FormatMediaType(t string, param map[string]string) string {
     22 	var b bytes.Buffer
     23 	if slash := strings.Index(t, "/"); slash == -1 {
     24 		if !isToken(t) {
     25 			return ""
     26 		}
     27 		b.WriteString(strings.ToLower(t))
     28 	} else {
     29 		major, sub := t[:slash], t[slash+1:]
     30 		if !isToken(major) || !isToken(sub) {
     31 			return ""
     32 		}
     33 		b.WriteString(strings.ToLower(major))
     34 		b.WriteByte('/')
     35 		b.WriteString(strings.ToLower(sub))
     36 	}
     37 
     38 	attrs := make([]string, 0, len(param))
     39 	for a := range param {
     40 		attrs = append(attrs, a)
     41 	}
     42 	sort.Strings(attrs)
     43 
     44 	for _, attribute := range attrs {
     45 		value := param[attribute]
     46 		b.WriteByte(';')
     47 		b.WriteByte(' ')
     48 		if !isToken(attribute) {
     49 			return ""
     50 		}
     51 		b.WriteString(strings.ToLower(attribute))
     52 		b.WriteByte('=')
     53 		if isToken(value) {
     54 			b.WriteString(value)
     55 			continue
     56 		}
     57 
     58 		b.WriteByte('"')
     59 		offset := 0
     60 		for index, character := range value {
     61 			if character == '"' || character == '\\' {
     62 				b.WriteString(value[offset:index])
     63 				offset = index
     64 				b.WriteByte('\\')
     65 			}
     66 			if character&0x80 != 0 {
     67 				return ""
     68 			}
     69 		}
     70 		b.WriteString(value[offset:])
     71 		b.WriteByte('"')
     72 	}
     73 	return b.String()
     74 }
     75 
     76 func checkMediaTypeDisposition(s string) error {
     77 	typ, rest := consumeToken(s)
     78 	if typ == "" {
     79 		return errors.New("mime: no media type")
     80 	}
     81 	if rest == "" {
     82 		return nil
     83 	}
     84 	if !strings.HasPrefix(rest, "/") {
     85 		return errors.New("mime: expected slash after first token")
     86 	}
     87 	subtype, rest := consumeToken(rest[1:])
     88 	if subtype == "" {
     89 		return errors.New("mime: expected token after slash")
     90 	}
     91 	if rest != "" {
     92 		return errors.New("mime: unexpected content after media subtype")
     93 	}
     94 	return nil
     95 }
     96 
     97 // ErrInvalidMediaParameter is returned by ParseMediaType if
     98 // the media type value was found but there was an error parsing
     99 // the optional parameters
    100 var ErrInvalidMediaParameter = errors.New("mime: invalid media parameter")
    101 
    102 // ParseMediaType parses a media type value and any optional
    103 // parameters, per RFC 1521.  Media types are the values in
    104 // Content-Type and Content-Disposition headers (RFC 2183).
    105 // On success, ParseMediaType returns the media type converted
    106 // to lowercase and trimmed of white space and a non-nil map.
    107 // If there is an error parsing the optional parameter,
    108 // the media type will be returned along with the error
    109 // ErrInvalidMediaParameter.
    110 // The returned map, params, maps from the lowercase
    111 // attribute to the attribute value with its case preserved.
    112 func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
    113 	i := strings.Index(v, ";")
    114 	if i == -1 {
    115 		i = len(v)
    116 	}
    117 	mediatype = strings.TrimSpace(strings.ToLower(v[0:i]))
    118 
    119 	err = checkMediaTypeDisposition(mediatype)
    120 	if err != nil {
    121 		return "", nil, err
    122 	}
    123 
    124 	params = make(map[string]string)
    125 
    126 	// Map of base parameter name -> parameter name -> value
    127 	// for parameters containing a '*' character.
    128 	// Lazily initialized.
    129 	var continuation map[string]map[string]string
    130 
    131 	v = v[i:]
    132 	for len(v) > 0 {
    133 		v = strings.TrimLeftFunc(v, unicode.IsSpace)
    134 		if len(v) == 0 {
    135 			break
    136 		}
    137 		key, value, rest := consumeMediaParam(v)
    138 		if key == "" {
    139 			if strings.TrimSpace(rest) == ";" {
    140 				// Ignore trailing semicolons.
    141 				// Not an error.
    142 				return
    143 			}
    144 			// Parse error.
    145 			return mediatype, nil, ErrInvalidMediaParameter
    146 		}
    147 
    148 		pmap := params
    149 		if idx := strings.Index(key, "*"); idx != -1 {
    150 			baseName := key[:idx]
    151 			if continuation == nil {
    152 				continuation = make(map[string]map[string]string)
    153 			}
    154 			var ok bool
    155 			if pmap, ok = continuation[baseName]; !ok {
    156 				continuation[baseName] = make(map[string]string)
    157 				pmap = continuation[baseName]
    158 			}
    159 		}
    160 		if _, exists := pmap[key]; exists {
    161 			// Duplicate parameter name is bogus.
    162 			return "", nil, errors.New("mime: duplicate parameter name")
    163 		}
    164 		pmap[key] = value
    165 		v = rest
    166 	}
    167 
    168 	// Stitch together any continuations or things with stars
    169 	// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
    170 	var buf bytes.Buffer
    171 	for key, pieceMap := range continuation {
    172 		singlePartKey := key + "*"
    173 		if v, ok := pieceMap[singlePartKey]; ok {
    174 			if decv, ok := decode2231Enc(v); ok {
    175 				params[key] = decv
    176 			}
    177 			continue
    178 		}
    179 
    180 		buf.Reset()
    181 		valid := false
    182 		for n := 0; ; n++ {
    183 			simplePart := fmt.Sprintf("%s*%d", key, n)
    184 			if v, ok := pieceMap[simplePart]; ok {
    185 				valid = true
    186 				buf.WriteString(v)
    187 				continue
    188 			}
    189 			encodedPart := simplePart + "*"
    190 			v, ok := pieceMap[encodedPart]
    191 			if !ok {
    192 				break
    193 			}
    194 			valid = true
    195 			if n == 0 {
    196 				if decv, ok := decode2231Enc(v); ok {
    197 					buf.WriteString(decv)
    198 				}
    199 			} else {
    200 				decv, _ := percentHexUnescape(v)
    201 				buf.WriteString(decv)
    202 			}
    203 		}
    204 		if valid {
    205 			params[key] = buf.String()
    206 		}
    207 	}
    208 
    209 	return
    210 }
    211 
    212 func decode2231Enc(v string) (string, bool) {
    213 	sv := strings.SplitN(v, "'", 3)
    214 	if len(sv) != 3 {
    215 		return "", false
    216 	}
    217 	// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
    218 	// need to decide how to expose it in the API. But I'm not sure
    219 	// anybody uses it in practice.
    220 	charset := strings.ToLower(sv[0])
    221 	if len(charset) == 0 {
    222 		return "", false
    223 	}
    224 	if charset != "us-ascii" && charset != "utf-8" {
    225 		// TODO: unsupported encoding
    226 		return "", false
    227 	}
    228 	encv, err := percentHexUnescape(sv[2])
    229 	if err != nil {
    230 		return "", false
    231 	}
    232 	return encv, true
    233 }
    234 
    235 func isNotTokenChar(r rune) bool {
    236 	return !isTokenChar(r)
    237 }
    238 
    239 // consumeToken consumes a token from the beginning of provided
    240 // string, per RFC 2045 section 5.1 (referenced from 2183), and return
    241 // the token consumed and the rest of the string. Returns ("", v) on
    242 // failure to consume at least one character.
    243 func consumeToken(v string) (token, rest string) {
    244 	notPos := strings.IndexFunc(v, isNotTokenChar)
    245 	if notPos == -1 {
    246 		return v, ""
    247 	}
    248 	if notPos == 0 {
    249 		return "", v
    250 	}
    251 	return v[0:notPos], v[notPos:]
    252 }
    253 
    254 // consumeValue consumes a "value" per RFC 2045, where a value is
    255 // either a 'token' or a 'quoted-string'.  On success, consumeValue
    256 // returns the value consumed (and de-quoted/escaped, if a
    257 // quoted-string) and the rest of the string. On failure, returns
    258 // ("", v).
    259 func consumeValue(v string) (value, rest string) {
    260 	if v == "" {
    261 		return
    262 	}
    263 	if v[0] != '"' {
    264 		return consumeToken(v)
    265 	}
    266 
    267 	// parse a quoted-string
    268 	buffer := new(bytes.Buffer)
    269 	for i := 1; i < len(v); i++ {
    270 		r := v[i]
    271 		if r == '"' {
    272 			return buffer.String(), v[i+1:]
    273 		}
    274 		// When MSIE sends a full file path (in "intranet mode"), it does not
    275 		// escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt".
    276 		//
    277 		// No known MIME generators emit unnecessary backslash escapes
    278 		// for simple token characters like numbers and letters.
    279 		//
    280 		// If we see an unnecessary backslash escape, assume it is from MSIE
    281 		// and intended as a literal backslash. This makes Go servers deal better
    282 		// with MSIE without affecting the way they handle conforming MIME
    283 		// generators.
    284 		if r == '\\' && i+1 < len(v) && !isTokenChar(rune(v[i+1])) {
    285 			buffer.WriteByte(v[i+1])
    286 			i++
    287 			continue
    288 		}
    289 		if r == '\r' || r == '\n' {
    290 			return "", v
    291 		}
    292 		buffer.WriteByte(v[i])
    293 	}
    294 	// Did not find end quote.
    295 	return "", v
    296 }
    297 
    298 func consumeMediaParam(v string) (param, value, rest string) {
    299 	rest = strings.TrimLeftFunc(v, unicode.IsSpace)
    300 	if !strings.HasPrefix(rest, ";") {
    301 		return "", "", v
    302 	}
    303 
    304 	rest = rest[1:] // consume semicolon
    305 	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
    306 	param, rest = consumeToken(rest)
    307 	param = strings.ToLower(param)
    308 	if param == "" {
    309 		return "", "", v
    310 	}
    311 
    312 	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
    313 	if !strings.HasPrefix(rest, "=") {
    314 		return "", "", v
    315 	}
    316 	rest = rest[1:] // consume equals sign
    317 	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
    318 	value, rest2 := consumeValue(rest)
    319 	if value == "" && rest2 == rest {
    320 		return "", "", v
    321 	}
    322 	rest = rest2
    323 	return param, value, rest
    324 }
    325 
    326 func percentHexUnescape(s string) (string, error) {
    327 	// Count %, check that they're well-formed.
    328 	percents := 0
    329 	for i := 0; i < len(s); {
    330 		if s[i] != '%' {
    331 			i++
    332 			continue
    333 		}
    334 		percents++
    335 		if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
    336 			s = s[i:]
    337 			if len(s) > 3 {
    338 				s = s[0:3]
    339 			}
    340 			return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
    341 		}
    342 		i += 3
    343 	}
    344 	if percents == 0 {
    345 		return s, nil
    346 	}
    347 
    348 	t := make([]byte, len(s)-2*percents)
    349 	j := 0
    350 	for i := 0; i < len(s); {
    351 		switch s[i] {
    352 		case '%':
    353 			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
    354 			j++
    355 			i += 3
    356 		default:
    357 			t[j] = s[i]
    358 			j++
    359 			i++
    360 		}
    361 	}
    362 	return string(t), nil
    363 }
    364 
    365 func ishex(c byte) bool {
    366 	switch {
    367 	case '0' <= c && c <= '9':
    368 		return true
    369 	case 'a' <= c && c <= 'f':
    370 		return true
    371 	case 'A' <= c && c <= 'F':
    372 		return true
    373 	}
    374 	return false
    375 }
    376 
    377 func unhex(c byte) byte {
    378 	switch {
    379 	case '0' <= c && c <= '9':
    380 		return c - '0'
    381 	case 'a' <= c && c <= 'f':
    382 		return c - 'a' + 10
    383 	case 'A' <= c && c <= 'F':
    384 		return c - 'A' + 10
    385 	}
    386 	return 0
    387 }
    388