Home | History | Annotate | Download | only in mime
      1 // Copyright 2010 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package mime
      6 
      7 import (
      8 	"reflect"
      9 	"testing"
     10 )
     11 
     12 func TestConsumeToken(t *testing.T) {
     13 	tests := [...][3]string{
     14 		{"foo bar", "foo", " bar"},
     15 		{"bar", "bar", ""},
     16 		{"", "", ""},
     17 		{" foo", "", " foo"},
     18 	}
     19 	for _, test := range tests {
     20 		token, rest := consumeToken(test[0])
     21 		expectedToken := test[1]
     22 		expectedRest := test[2]
     23 		if token != expectedToken {
     24 			t.Errorf("expected to consume token '%s', not '%s' from '%s'",
     25 				expectedToken, token, test[0])
     26 		} else if rest != expectedRest {
     27 			t.Errorf("expected to have left '%s', not '%s' after reading token '%s' from '%s'",
     28 				expectedRest, rest, token, test[0])
     29 		}
     30 	}
     31 }
     32 
     33 func TestConsumeValue(t *testing.T) {
     34 	tests := [...][3]string{
     35 		{"foo bar", "foo", " bar"},
     36 		{"bar", "bar", ""},
     37 		{" bar ", "", " bar "},
     38 		{`"My value"end`, "My value", "end"},
     39 		{`"My value" end`, "My value", " end"},
     40 		{`"\\" rest`, "\\", " rest"},
     41 		{`"My \" value"end`, "My \" value", "end"},
     42 		{`"\" rest`, "", `"\" rest`},
     43 	}
     44 	for _, test := range tests {
     45 		value, rest := consumeValue(test[0])
     46 		expectedValue := test[1]
     47 		expectedRest := test[2]
     48 		if value != expectedValue {
     49 			t.Errorf("expected to consume value [%s], not [%s] from [%s]",
     50 				expectedValue, value, test[0])
     51 		} else if rest != expectedRest {
     52 			t.Errorf("expected to have left [%s], not [%s] after reading value [%s] from [%s]",
     53 				expectedRest, rest, value, test[0])
     54 		}
     55 	}
     56 }
     57 
     58 func TestConsumeMediaParam(t *testing.T) {
     59 	tests := [...][4]string{
     60 		{" ; foo=bar", "foo", "bar", ""},
     61 		{"; foo=bar", "foo", "bar", ""},
     62 		{";foo=bar", "foo", "bar", ""},
     63 		{";FOO=bar", "foo", "bar", ""},
     64 		{`;foo="bar"`, "foo", "bar", ""},
     65 		{`;foo="bar"; `, "foo", "bar", "; "},
     66 		{`;foo="bar"; foo=baz`, "foo", "bar", "; foo=baz"},
     67 		{` ; boundary=----CUT;`, "boundary", "----CUT", ";"},
     68 		{` ; key=value;  blah="value";name="foo" `, "key", "value", `;  blah="value";name="foo" `},
     69 		{`;  blah="value";name="foo" `, "blah", "value", `;name="foo" `},
     70 		{`;name="foo" `, "name", "foo", ` `},
     71 	}
     72 	for _, test := range tests {
     73 		param, value, rest := consumeMediaParam(test[0])
     74 		expectedParam := test[1]
     75 		expectedValue := test[2]
     76 		expectedRest := test[3]
     77 		if param != expectedParam {
     78 			t.Errorf("expected to consume param [%s], not [%s] from [%s]",
     79 				expectedParam, param, test[0])
     80 		} else if value != expectedValue {
     81 			t.Errorf("expected to consume value [%s], not [%s] from [%s]",
     82 				expectedValue, value, test[0])
     83 		} else if rest != expectedRest {
     84 			t.Errorf("expected to have left [%s], not [%s] after reading [%s/%s] from [%s]",
     85 				expectedRest, rest, param, value, test[0])
     86 		}
     87 	}
     88 }
     89 
     90 type mediaTypeTest struct {
     91 	in string
     92 	t  string
     93 	p  map[string]string
     94 }
     95 
     96 func TestParseMediaType(t *testing.T) {
     97 	// Convenience map initializer
     98 	m := func(s ...string) map[string]string {
     99 		sm := make(map[string]string)
    100 		for i := 0; i < len(s); i += 2 {
    101 			sm[s[i]] = s[i+1]
    102 		}
    103 		return sm
    104 	}
    105 
    106 	nameFoo := map[string]string{"name": "foo"}
    107 	tests := []mediaTypeTest{
    108 		{`form-data; name="foo"`, "form-data", nameFoo},
    109 		{` form-data ; name=foo`, "form-data", nameFoo},
    110 		{`FORM-DATA;name="foo"`, "form-data", nameFoo},
    111 		{` FORM-DATA ; name="foo"`, "form-data", nameFoo},
    112 		{` FORM-DATA ; name="foo"`, "form-data", nameFoo},
    113 
    114 		{`form-data; key=value;  blah="value";name="foo" `,
    115 			"form-data",
    116 			m("key", "value", "blah", "value", "name", "foo")},
    117 
    118 		{`foo; key=val1; key=the-key-appears-again-which-is-bogus`,
    119 			"", m()},
    120 
    121 		// From RFC 2231:
    122 		{`application/x-stuff; title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A`,
    123 			"application/x-stuff",
    124 			m("title", "This is ***fun***")},
    125 
    126 		{`message/external-body; access-type=URL; ` +
    127 			`URL*0="ftp://";` +
    128 			`URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"`,
    129 			"message/external-body",
    130 			m("access-type", "URL",
    131 				"url", "ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar")},
    132 
    133 		{`application/x-stuff; ` +
    134 			`title*0*=us-ascii'en'This%20is%20even%20more%20; ` +
    135 			`title*1*=%2A%2A%2Afun%2A%2A%2A%20; ` +
    136 			`title*2="isn't it!"`,
    137 			"application/x-stuff",
    138 			m("title", "This is even more ***fun*** isn't it!")},
    139 
    140 		// Tests from http://greenbytes.de/tech/tc2231/
    141 		// Note: Backslash escape handling is a bit loose, like MSIE.
    142 
    143 		// #attonly
    144 		{`attachment`,
    145 			"attachment",
    146 			m()},
    147 		// #attonlyucase
    148 		{`ATTACHMENT`,
    149 			"attachment",
    150 			m()},
    151 		// #attwithasciifilename
    152 		{`attachment; filename="foo.html"`,
    153 			"attachment",
    154 			m("filename", "foo.html")},
    155 		// #attwithasciifilename25
    156 		{`attachment; filename="0000000000111111111122222"`,
    157 			"attachment",
    158 			m("filename", "0000000000111111111122222")},
    159 		// #attwithasciifilename35
    160 		{`attachment; filename="00000000001111111111222222222233333"`,
    161 			"attachment",
    162 			m("filename", "00000000001111111111222222222233333")},
    163 		// #attwithasciifnescapedchar
    164 		{`attachment; filename="f\oo.html"`,
    165 			"attachment",
    166 			m("filename", "f\\oo.html")},
    167 		// #attwithasciifnescapedquote
    168 		{`attachment; filename="\"quoting\" tested.html"`,
    169 			"attachment",
    170 			m("filename", `"quoting" tested.html`)},
    171 		// #attwithquotedsemicolon
    172 		{`attachment; filename="Here's a semicolon;.html"`,
    173 			"attachment",
    174 			m("filename", "Here's a semicolon;.html")},
    175 		// #attwithfilenameandextparam
    176 		{`attachment; foo="bar"; filename="foo.html"`,
    177 			"attachment",
    178 			m("foo", "bar", "filename", "foo.html")},
    179 		// #attwithfilenameandextparamescaped
    180 		{`attachment; foo="\"\\";filename="foo.html"`,
    181 			"attachment",
    182 			m("foo", "\"\\", "filename", "foo.html")},
    183 		// #attwithasciifilenameucase
    184 		{`attachment; FILENAME="foo.html"`,
    185 			"attachment",
    186 			m("filename", "foo.html")},
    187 		// #attwithasciifilenamenq
    188 		{`attachment; filename=foo.html`,
    189 			"attachment",
    190 			m("filename", "foo.html")},
    191 		// #attwithasciifilenamenqs
    192 		{`attachment; filename=foo.html ;`,
    193 			"attachment",
    194 			m("filename", "foo.html")},
    195 		// #attwithfntokensq
    196 		{`attachment; filename='foo.html'`,
    197 			"attachment",
    198 			m("filename", "'foo.html'")},
    199 		// #attwithisofnplain
    200 		{`attachment; filename="foo-.html"`,
    201 			"attachment",
    202 			m("filename", "foo-.html")},
    203 		// #attwithutf8fnplain
    204 		{`attachment; filename="foo-.html"`,
    205 			"attachment",
    206 			m("filename", "foo-.html")},
    207 		// #attwithfnrawpctenca
    208 		{`attachment; filename="foo-%41.html"`,
    209 			"attachment",
    210 			m("filename", "foo-%41.html")},
    211 		// #attwithfnusingpct
    212 		{`attachment; filename="50%.html"`,
    213 			"attachment",
    214 			m("filename", "50%.html")},
    215 		// #attwithfnrawpctencaq
    216 		{`attachment; filename="foo-%\41.html"`,
    217 			"attachment",
    218 			m("filename", "foo-%\\41.html")},
    219 		// #attwithnamepct
    220 		{`attachment; name="foo-%41.html"`,
    221 			"attachment",
    222 			m("name", "foo-%41.html")},
    223 		// #attwithfilenamepctandiso
    224 		{`attachment; name="-%41.html"`,
    225 			"attachment",
    226 			m("name", "-%41.html")},
    227 		// #attwithfnrawpctenclong
    228 		{`attachment; filename="foo-%c3%a4-%e2%82%ac.html"`,
    229 			"attachment",
    230 			m("filename", "foo-%c3%a4-%e2%82%ac.html")},
    231 		// #attwithasciifilenamews1
    232 		{`attachment; filename ="foo.html"`,
    233 			"attachment",
    234 			m("filename", "foo.html")},
    235 		// #attmissingdisposition
    236 		{`filename=foo.html`,
    237 			"", m()},
    238 		// #attmissingdisposition2
    239 		{`x=y; filename=foo.html`,
    240 			"", m()},
    241 		// #attmissingdisposition3
    242 		{`"foo; filename=bar;baz"; filename=qux`,
    243 			"", m()},
    244 		// #attmissingdisposition4
    245 		{`filename=foo.html, filename=bar.html`,
    246 			"", m()},
    247 		// #emptydisposition
    248 		{`; filename=foo.html`,
    249 			"", m()},
    250 		// #doublecolon
    251 		{`: inline; attachment; filename=foo.html`,
    252 			"", m()},
    253 		// #attandinline
    254 		{`inline; attachment; filename=foo.html`,
    255 			"", m()},
    256 		// #attandinline2
    257 		{`attachment; inline; filename=foo.html`,
    258 			"", m()},
    259 		// #attbrokenquotedfn
    260 		{`attachment; filename="foo.html".txt`,
    261 			"", m()},
    262 		// #attbrokenquotedfn2
    263 		{`attachment; filename="bar`,
    264 			"", m()},
    265 		// #attbrokenquotedfn3
    266 		{`attachment; filename=foo"bar;baz"qux`,
    267 			"", m()},
    268 		// #attmultinstances
    269 		{`attachment; filename=foo.html, attachment; filename=bar.html`,
    270 			"", m()},
    271 		// #attmissingdelim
    272 		{`attachment; foo=foo filename=bar`,
    273 			"", m()},
    274 		// #attmissingdelim2
    275 		{`attachment; filename=bar foo=foo`,
    276 			"", m()},
    277 		// #attmissingdelim3
    278 		{`attachment filename=bar`,
    279 			"", m()},
    280 		// #attreversed
    281 		{`filename=foo.html; attachment`,
    282 			"", m()},
    283 		// #attconfusedparam
    284 		{`attachment; xfilename=foo.html`,
    285 			"attachment",
    286 			m("xfilename", "foo.html")},
    287 		// #attcdate
    288 		{`attachment; creation-date="Wed, 12 Feb 1997 16:29:51 -0500"`,
    289 			"attachment",
    290 			m("creation-date", "Wed, 12 Feb 1997 16:29:51 -0500")},
    291 		// #attmdate
    292 		{`attachment; modification-date="Wed, 12 Feb 1997 16:29:51 -0500"`,
    293 			"attachment",
    294 			m("modification-date", "Wed, 12 Feb 1997 16:29:51 -0500")},
    295 		// #dispext
    296 		{`foobar`, "foobar", m()},
    297 		// #dispextbadfn
    298 		{`attachment; example="filename=example.txt"`,
    299 			"attachment",
    300 			m("example", "filename=example.txt")},
    301 		// #attwithfn2231utf8
    302 		{`attachment; filename*=UTF-8''foo-%c3%a4-%e2%82%ac.html`,
    303 			"attachment",
    304 			m("filename", "foo--.html")},
    305 		// #attwithfn2231noc
    306 		{`attachment; filename*=''foo-%c3%a4-%e2%82%ac.html`,
    307 			"attachment",
    308 			m()},
    309 		// #attwithfn2231utf8comp
    310 		{`attachment; filename*=UTF-8''foo-a%cc%88.html`,
    311 			"attachment",
    312 			m("filename", "foo-a.html")},
    313 		// #attwithfn2231ws2
    314 		{`attachment; filename*= UTF-8''foo-%c3%a4.html`,
    315 			"attachment",
    316 			m("filename", "foo-.html")},
    317 		// #attwithfn2231ws3
    318 		{`attachment; filename* =UTF-8''foo-%c3%a4.html`,
    319 			"attachment",
    320 			m("filename", "foo-.html")},
    321 		// #attwithfn2231quot
    322 		{`attachment; filename*="UTF-8''foo-%c3%a4.html"`,
    323 			"attachment",
    324 			m("filename", "foo-.html")},
    325 		// #attwithfn2231quot2
    326 		{`attachment; filename*="foo%20bar.html"`,
    327 			"attachment",
    328 			m()},
    329 		// #attwithfn2231singleqmissing
    330 		{`attachment; filename*=UTF-8'foo-%c3%a4.html`,
    331 			"attachment",
    332 			m()},
    333 		// #attwithfn2231nbadpct1
    334 		{`attachment; filename*=UTF-8''foo%`,
    335 			"attachment",
    336 			m()},
    337 		// #attwithfn2231nbadpct2
    338 		{`attachment; filename*=UTF-8''f%oo.html`,
    339 			"attachment",
    340 			m()},
    341 		// #attwithfn2231dpct
    342 		{`attachment; filename*=UTF-8''A-%2541.html`,
    343 			"attachment",
    344 			m("filename", "A-%41.html")},
    345 		// #attfncont
    346 		{`attachment; filename*0="foo."; filename*1="html"`,
    347 			"attachment",
    348 			m("filename", "foo.html")},
    349 		// #attfncontenc
    350 		{`attachment; filename*0*=UTF-8''foo-%c3%a4; filename*1=".html"`,
    351 			"attachment",
    352 			m("filename", "foo-.html")},
    353 		// #attfncontlz
    354 		{`attachment; filename*0="foo"; filename*01="bar"`,
    355 			"attachment",
    356 			m("filename", "foo")},
    357 		// #attfncontnc
    358 		{`attachment; filename*0="foo"; filename*2="bar"`,
    359 			"attachment",
    360 			m("filename", "foo")},
    361 		// #attfnconts1
    362 		{`attachment; filename*1="foo."; filename*2="html"`,
    363 			"attachment", m()},
    364 		// #attfncontord
    365 		{`attachment; filename*1="bar"; filename*0="foo"`,
    366 			"attachment",
    367 			m("filename", "foobar")},
    368 		// #attfnboth
    369 		{`attachment; filename="foo-ae.html"; filename*=UTF-8''foo-%c3%a4.html`,
    370 			"attachment",
    371 			m("filename", "foo-.html")},
    372 		// #attfnboth2
    373 		{`attachment; filename*=UTF-8''foo-%c3%a4.html; filename="foo-ae.html"`,
    374 			"attachment",
    375 			m("filename", "foo-.html")},
    376 		// #attfnboth3
    377 		{`attachment; filename*0*=ISO-8859-15''euro-sign%3d%a4; filename*=ISO-8859-1''currency-sign%3d%a4`,
    378 			"attachment",
    379 			m()},
    380 		// #attnewandfn
    381 		{`attachment; foobar=x; filename="foo.html"`,
    382 			"attachment",
    383 			m("foobar", "x", "filename", "foo.html")},
    384 
    385 		// Browsers also just send UTF-8 directly without RFC 2231,
    386 		// at least when the source page is served with UTF-8.
    387 		{`form-data; firstname=""; lastname=""`,
    388 			"form-data",
    389 			m("firstname", "", "lastname", "")},
    390 
    391 		// Empty string used to be mishandled.
    392 		{`foo; bar=""`, "foo", m("bar", "")},
    393 
    394 		// Microsoft browers in intranet mode do not think they need to escape \ in file name.
    395 		{`form-data; name="file"; filename="C:\dev\go\robots.txt"`, "form-data", m("name", "file", "filename", `C:\dev\go\robots.txt`)},
    396 	}
    397 	for _, test := range tests {
    398 		mt, params, err := ParseMediaType(test.in)
    399 		if err != nil {
    400 			if test.t != "" {
    401 				t.Errorf("for input %#q, unexpected error: %v", test.in, err)
    402 				continue
    403 			}
    404 			continue
    405 		}
    406 		if g, e := mt, test.t; g != e {
    407 			t.Errorf("for input %#q, expected type %q, got %q",
    408 				test.in, e, g)
    409 			continue
    410 		}
    411 		if len(params) == 0 && len(test.p) == 0 {
    412 			continue
    413 		}
    414 		if !reflect.DeepEqual(params, test.p) {
    415 			t.Errorf("for input %#q, wrong params.\n"+
    416 				"expected: %#v\n"+
    417 				"     got: %#v",
    418 				test.in, test.p, params)
    419 		}
    420 	}
    421 }
    422 
    423 type badMediaTypeTest struct {
    424 	in  string
    425 	mt  string
    426 	err string
    427 }
    428 
    429 var badMediaTypeTests = []badMediaTypeTest{
    430 	{"bogus ;=========", "bogus", "mime: invalid media parameter"},
    431 	// The following example is from real email delivered by gmail (error: missing semicolon)
    432 	// and it is there to check behavior described in #19498
    433 	{"application/pdf; x-mac-type=\"3F3F3F3F\"; x-mac-creator=\"3F3F3F3F\" name=\"a.pdf\";",
    434 		"application/pdf", "mime: invalid media parameter"},
    435 	{"bogus/<script>alert</script>", "", "mime: expected token after slash"},
    436 	{"bogus/bogus<script>alert</script>", "", "mime: unexpected content after media subtype"},
    437 	// Tests from http://greenbytes.de/tech/tc2231/
    438 	{`"attachment"`, "attachment", "mime: no media type"},
    439 	{"attachment; filename=foo,bar.html", "attachment", "mime: invalid media parameter"},
    440 	{"attachment; ;filename=foo", "attachment", "mime: invalid media parameter"},
    441 	{"attachment; filename=foo bar.html", "attachment", "mime: invalid media parameter"},
    442 	{`attachment; filename="foo.html"; filename="bar.html"`, "attachment", "mime: duplicate parameter name"},
    443 	{"attachment; filename=foo[1](2).html", "attachment", "mime: invalid media parameter"},
    444 	{"attachment; filename=foo-.html", "attachment", "mime: invalid media parameter"},
    445 	{"attachment; filename=foo-.html", "attachment", "mime: invalid media parameter"},
    446 	{`attachment; filename *=UTF-8''foo-%c3%a4.html`, "attachment", "mime: invalid media parameter"},
    447 }
    448 
    449 func TestParseMediaTypeBogus(t *testing.T) {
    450 	for _, tt := range badMediaTypeTests {
    451 		mt, params, err := ParseMediaType(tt.in)
    452 		if err == nil {
    453 			t.Errorf("ParseMediaType(%q) = nil error; want parse error", tt.in)
    454 			continue
    455 		}
    456 		if err.Error() != tt.err {
    457 			t.Errorf("ParseMediaType(%q) = err %q; want %q", tt.in, err.Error(), tt.err)
    458 		}
    459 		if params != nil {
    460 			t.Errorf("ParseMediaType(%q): got non-nil params on error", tt.in)
    461 		}
    462 		if err != ErrInvalidMediaParameter && mt != "" {
    463 			t.Errorf("ParseMediaType(%q): got unexpected non-empty media type string", tt.in)
    464 		}
    465 		if err == ErrInvalidMediaParameter && mt != tt.mt {
    466 			t.Errorf("ParseMediaType(%q): in case of invalid parameters: expected type %q, got %q", tt.in, tt.mt, mt)
    467 		}
    468 	}
    469 }
    470 
    471 type formatTest struct {
    472 	typ    string
    473 	params map[string]string
    474 	want   string
    475 }
    476 
    477 var formatTests = []formatTest{
    478 	{"noslash", map[string]string{"X": "Y"}, "noslash; x=Y"}, // e.g. Content-Disposition values (RFC 2183); issue 11289
    479 	{"foo bar/baz", nil, ""},
    480 	{"foo/bar baz", nil, ""},
    481 	{"foo/BAR", nil, "foo/bar"},
    482 	{"foo/BAR", map[string]string{"X": "Y"}, "foo/bar; x=Y"},
    483 	{"foo/BAR", map[string]string{"space": "With space"}, `foo/bar; space="With space"`},
    484 	{"foo/BAR", map[string]string{"quote": `With "quote`}, `foo/bar; quote="With \"quote"`},
    485 	{"foo/BAR", map[string]string{"bslash": `With \backslash`}, `foo/bar; bslash="With \\backslash"`},
    486 	{"foo/BAR", map[string]string{"both": `With \backslash and "quote`}, `foo/bar; both="With \\backslash and \"quote"`},
    487 	{"foo/BAR", map[string]string{"": "empty attribute"}, ""},
    488 	{"foo/BAR", map[string]string{"bad attribute": "baz"}, ""},
    489 	{"foo/BAR", map[string]string{"nonascii": "not an ascii character: "}, ""},
    490 	{"foo/bar", map[string]string{"a": "av", "b": "bv", "c": "cv"}, "foo/bar; a=av; b=bv; c=cv"},
    491 	{"foo/bar", map[string]string{"0": "'", "9": "'"}, "foo/bar; 0='; 9='"},
    492 	{"foo", map[string]string{"bar": ""}, `foo; bar=""`},
    493 }
    494 
    495 func TestFormatMediaType(t *testing.T) {
    496 	for i, tt := range formatTests {
    497 		got := FormatMediaType(tt.typ, tt.params)
    498 		if got != tt.want {
    499 			t.Errorf("%d. FormatMediaType(%q, %v) = %q; want %q", i, tt.typ, tt.params, got, tt.want)
    500 		}
    501 	}
    502 }
    503