Home | History | Annotate | Download | only in mime
      1 // Copyright 2010 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package mime
      6 
      7 import (
      8 	"bytes"
      9 	"errors"
     10 	"fmt"
     11 	"sort"
     12 	"strings"
     13 	"unicode"
     14 )
     15 
     16 // FormatMediaType serializes mediatype t and the parameters
     17 // param as a media type conforming to RFC 2045 and RFC 2616.
     18 // The type and parameter names are written in lower-case.
     19 // When any of the arguments result in a standard violation then
     20 // FormatMediaType returns the empty string.
     21 func FormatMediaType(t string, param map[string]string) string {
     22 	var b bytes.Buffer
     23 	if slash := strings.Index(t, "/"); slash == -1 {
     24 		if !isToken(t) {
     25 			return ""
     26 		}
     27 		b.WriteString(strings.ToLower(t))
     28 	} else {
     29 		major, sub := t[:slash], t[slash+1:]
     30 		if !isToken(major) || !isToken(sub) {
     31 			return ""
     32 		}
     33 		b.WriteString(strings.ToLower(major))
     34 		b.WriteByte('/')
     35 		b.WriteString(strings.ToLower(sub))
     36 	}
     37 
     38 	attrs := make([]string, 0, len(param))
     39 	for a := range param {
     40 		attrs = append(attrs, a)
     41 	}
     42 	sort.Strings(attrs)
     43 
     44 	for _, attribute := range attrs {
     45 		value := param[attribute]
     46 		b.WriteByte(';')
     47 		b.WriteByte(' ')
     48 		if !isToken(attribute) {
     49 			return ""
     50 		}
     51 		b.WriteString(strings.ToLower(attribute))
     52 		b.WriteByte('=')
     53 		if isToken(value) {
     54 			b.WriteString(value)
     55 			continue
     56 		}
     57 
     58 		b.WriteByte('"')
     59 		offset := 0
     60 		for index, character := range value {
     61 			if character == '"' || character == '\\' {
     62 				b.WriteString(value[offset:index])
     63 				offset = index
     64 				b.WriteByte('\\')
     65 			}
     66 			if character&0x80 != 0 {
     67 				return ""
     68 			}
     69 		}
     70 		b.WriteString(value[offset:])
     71 		b.WriteByte('"')
     72 	}
     73 	return b.String()
     74 }
     75 
     76 func checkMediaTypeDisposition(s string) error {
     77 	typ, rest := consumeToken(s)
     78 	if typ == "" {
     79 		return errors.New("mime: no media type")
     80 	}
     81 	if rest == "" {
     82 		return nil
     83 	}
     84 	if !strings.HasPrefix(rest, "/") {
     85 		return errors.New("mime: expected slash after first token")
     86 	}
     87 	subtype, rest := consumeToken(rest[1:])
     88 	if subtype == "" {
     89 		return errors.New("mime: expected token after slash")
     90 	}
     91 	if rest != "" {
     92 		return errors.New("mime: unexpected content after media subtype")
     93 	}
     94 	return nil
     95 }
     96 
     97 // ParseMediaType parses a media type value and any optional
     98 // parameters, per RFC 1521.  Media types are the values in
     99 // Content-Type and Content-Disposition headers (RFC 2183).
    100 // On success, ParseMediaType returns the media type converted
    101 // to lowercase and trimmed of white space and a non-nil map.
    102 // The returned map, params, maps from the lowercase
    103 // attribute to the attribute value with its case preserved.
    104 func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
    105 	i := strings.Index(v, ";")
    106 	if i == -1 {
    107 		i = len(v)
    108 	}
    109 	mediatype = strings.TrimSpace(strings.ToLower(v[0:i]))
    110 
    111 	err = checkMediaTypeDisposition(mediatype)
    112 	if err != nil {
    113 		return "", nil, err
    114 	}
    115 
    116 	params = make(map[string]string)
    117 
    118 	// Map of base parameter name -> parameter name -> value
    119 	// for parameters containing a '*' character.
    120 	// Lazily initialized.
    121 	var continuation map[string]map[string]string
    122 
    123 	v = v[i:]
    124 	for len(v) > 0 {
    125 		v = strings.TrimLeftFunc(v, unicode.IsSpace)
    126 		if len(v) == 0 {
    127 			break
    128 		}
    129 		key, value, rest := consumeMediaParam(v)
    130 		if key == "" {
    131 			if strings.TrimSpace(rest) == ";" {
    132 				// Ignore trailing semicolons.
    133 				// Not an error.
    134 				return
    135 			}
    136 			// Parse error.
    137 			return "", nil, errors.New("mime: invalid media parameter")
    138 		}
    139 
    140 		pmap := params
    141 		if idx := strings.Index(key, "*"); idx != -1 {
    142 			baseName := key[:idx]
    143 			if continuation == nil {
    144 				continuation = make(map[string]map[string]string)
    145 			}
    146 			var ok bool
    147 			if pmap, ok = continuation[baseName]; !ok {
    148 				continuation[baseName] = make(map[string]string)
    149 				pmap = continuation[baseName]
    150 			}
    151 		}
    152 		if _, exists := pmap[key]; exists {
    153 			// Duplicate parameter name is bogus.
    154 			return "", nil, errors.New("mime: duplicate parameter name")
    155 		}
    156 		pmap[key] = value
    157 		v = rest
    158 	}
    159 
    160 	// Stitch together any continuations or things with stars
    161 	// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
    162 	var buf bytes.Buffer
    163 	for key, pieceMap := range continuation {
    164 		singlePartKey := key + "*"
    165 		if v, ok := pieceMap[singlePartKey]; ok {
    166 			decv := decode2231Enc(v)
    167 			params[key] = decv
    168 			continue
    169 		}
    170 
    171 		buf.Reset()
    172 		valid := false
    173 		for n := 0; ; n++ {
    174 			simplePart := fmt.Sprintf("%s*%d", key, n)
    175 			if v, ok := pieceMap[simplePart]; ok {
    176 				valid = true
    177 				buf.WriteString(v)
    178 				continue
    179 			}
    180 			encodedPart := simplePart + "*"
    181 			if v, ok := pieceMap[encodedPart]; ok {
    182 				valid = true
    183 				if n == 0 {
    184 					buf.WriteString(decode2231Enc(v))
    185 				} else {
    186 					decv, _ := percentHexUnescape(v)
    187 					buf.WriteString(decv)
    188 				}
    189 			} else {
    190 				break
    191 			}
    192 		}
    193 		if valid {
    194 			params[key] = buf.String()
    195 		}
    196 	}
    197 
    198 	return
    199 }
    200 
    201 func decode2231Enc(v string) string {
    202 	sv := strings.SplitN(v, "'", 3)
    203 	if len(sv) != 3 {
    204 		return ""
    205 	}
    206 	// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
    207 	// need to decide how to expose it in the API. But I'm not sure
    208 	// anybody uses it in practice.
    209 	charset := strings.ToLower(sv[0])
    210 	if charset != "us-ascii" && charset != "utf-8" {
    211 		// TODO: unsupported encoding
    212 		return ""
    213 	}
    214 	encv, _ := percentHexUnescape(sv[2])
    215 	return encv
    216 }
    217 
    218 func isNotTokenChar(r rune) bool {
    219 	return !isTokenChar(r)
    220 }
    221 
    222 // consumeToken consumes a token from the beginning of provided
    223 // string, per RFC 2045 section 5.1 (referenced from 2183), and return
    224 // the token consumed and the rest of the string. Returns ("", v) on
    225 // failure to consume at least one character.
    226 func consumeToken(v string) (token, rest string) {
    227 	notPos := strings.IndexFunc(v, isNotTokenChar)
    228 	if notPos == -1 {
    229 		return v, ""
    230 	}
    231 	if notPos == 0 {
    232 		return "", v
    233 	}
    234 	return v[0:notPos], v[notPos:]
    235 }
    236 
    237 // consumeValue consumes a "value" per RFC 2045, where a value is
    238 // either a 'token' or a 'quoted-string'.  On success, consumeValue
    239 // returns the value consumed (and de-quoted/escaped, if a
    240 // quoted-string) and the rest of the string. On failure, returns
    241 // ("", v).
    242 func consumeValue(v string) (value, rest string) {
    243 	if v == "" {
    244 		return
    245 	}
    246 	if v[0] != '"' {
    247 		return consumeToken(v)
    248 	}
    249 
    250 	// parse a quoted-string
    251 	buffer := new(bytes.Buffer)
    252 	for i := 1; i < len(v); i++ {
    253 		r := v[i]
    254 		if r == '"' {
    255 			return buffer.String(), v[i+1:]
    256 		}
    257 		// When MSIE sends a full file path (in "intranet mode"), it does not
    258 		// escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt".
    259 		//
    260 		// No known MIME generators emit unnecessary backslash escapes
    261 		// for simple token characters like numbers and letters.
    262 		//
    263 		// If we see an unnecessary backslash escape, assume it is from MSIE
    264 		// and intended as a literal backslash. This makes Go servers deal better
    265 		// with MSIE without affecting the way they handle conforming MIME
    266 		// generators.
    267 		if r == '\\' && i+1 < len(v) && !isTokenChar(rune(v[i+1])) {
    268 			buffer.WriteByte(v[i+1])
    269 			i++
    270 			continue
    271 		}
    272 		if r == '\r' || r == '\n' {
    273 			return "", v
    274 		}
    275 		buffer.WriteByte(v[i])
    276 	}
    277 	// Did not find end quote.
    278 	return "", v
    279 }
    280 
    281 func consumeMediaParam(v string) (param, value, rest string) {
    282 	rest = strings.TrimLeftFunc(v, unicode.IsSpace)
    283 	if !strings.HasPrefix(rest, ";") {
    284 		return "", "", v
    285 	}
    286 
    287 	rest = rest[1:] // consume semicolon
    288 	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
    289 	param, rest = consumeToken(rest)
    290 	param = strings.ToLower(param)
    291 	if param == "" {
    292 		return "", "", v
    293 	}
    294 
    295 	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
    296 	if !strings.HasPrefix(rest, "=") {
    297 		return "", "", v
    298 	}
    299 	rest = rest[1:] // consume equals sign
    300 	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
    301 	value, rest2 := consumeValue(rest)
    302 	if value == "" && rest2 == rest {
    303 		return "", "", v
    304 	}
    305 	rest = rest2
    306 	return param, value, rest
    307 }
    308 
    309 func percentHexUnescape(s string) (string, error) {
    310 	// Count %, check that they're well-formed.
    311 	percents := 0
    312 	for i := 0; i < len(s); {
    313 		if s[i] != '%' {
    314 			i++
    315 			continue
    316 		}
    317 		percents++
    318 		if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
    319 			s = s[i:]
    320 			if len(s) > 3 {
    321 				s = s[0:3]
    322 			}
    323 			return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
    324 		}
    325 		i += 3
    326 	}
    327 	if percents == 0 {
    328 		return s, nil
    329 	}
    330 
    331 	t := make([]byte, len(s)-2*percents)
    332 	j := 0
    333 	for i := 0; i < len(s); {
    334 		switch s[i] {
    335 		case '%':
    336 			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
    337 			j++
    338 			i += 3
    339 		default:
    340 			t[j] = s[i]
    341 			j++
    342 			i++
    343 		}
    344 	}
    345 	return string(t), nil
    346 }
    347 
    348 func ishex(c byte) bool {
    349 	switch {
    350 	case '0' <= c && c <= '9':
    351 		return true
    352 	case 'a' <= c && c <= 'f':
    353 		return true
    354 	case 'A' <= c && c <= 'F':
    355 		return true
    356 	}
    357 	return false
    358 }
    359 
    360 func unhex(c byte) byte {
    361 	switch {
    362 	case '0' <= c && c <= '9':
    363 		return c - '0'
    364 	case 'a' <= c && c <= 'f':
    365 		return c - 'a' + 10
    366 	case 'A' <= c && c <= 'F':
    367 		return c - 'A' + 10
    368 	}
    369 	return 0
    370 }
    371