1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/basictypes.h" 6 #include "net/base/mime_sniffer.h" 7 #include "testing/gtest/include/gtest/gtest.h" 8 #include "url/gurl.h" 9 10 namespace net { 11 12 struct SnifferTest { 13 const char* content; 14 size_t content_len; 15 std::string url; 16 std::string type_hint; 17 const char* mime_type; 18 }; 19 20 static void TestArray(SnifferTest* tests, size_t count) { 21 std::string mime_type; 22 23 for (size_t i = 0; i < count; ++i) { 24 SniffMimeType(tests[i].content, 25 tests[i].content_len, 26 GURL(tests[i].url), 27 tests[i].type_hint, 28 &mime_type); 29 EXPECT_EQ(tests[i].mime_type, mime_type); 30 } 31 } 32 33 // TODO(evanm): convert other tests to use SniffMimeType instead of TestArray, 34 // so the error messages produced by test failures are more useful. 35 static std::string SniffMimeType(const std::string& content, 36 const std::string& url, 37 const std::string& mime_type_hint) { 38 std::string mime_type; 39 SniffMimeType(content.data(), content.size(), GURL(url), 40 mime_type_hint, &mime_type); 41 return mime_type; 42 } 43 44 TEST(MimeSnifferTest, BoundaryConditionsTest) { 45 std::string mime_type; 46 std::string type_hint; 47 48 char buf[] = { 49 'd', '\x1f', '\xFF' 50 }; 51 52 GURL url; 53 54 SniffMimeType(buf, 0, url, type_hint, &mime_type); 55 EXPECT_EQ("text/plain", mime_type); 56 SniffMimeType(buf, 1, url, type_hint, &mime_type); 57 EXPECT_EQ("text/plain", mime_type); 58 SniffMimeType(buf, 2, url, type_hint, &mime_type); 59 EXPECT_EQ("application/octet-stream", mime_type); 60 } 61 62 TEST(MimeSnifferTest, BasicSniffingTest) { 63 SnifferTest tests[] = { 64 { "<!DOCTYPE html PUBLIC", sizeof("<!DOCTYPE html PUBLIC")-1, 65 "http://www.example.com/", 66 "", "text/html" }, 67 { "<HtMl><Body></body></htMl>", sizeof("<HtMl><Body></body></htMl>")-1, 68 "http://www.example.com/foo.gif", 69 "application/octet-stream", "application/octet-stream" }, 70 { "GIF89a\x1F\x83\x94", sizeof("GIF89a\xAF\x83\x94")-1, 71 "http://www.example.com/foo", 72 "text/plain", "image/gif" }, 73 { "Gif87a\x1F\x83\x94", sizeof("Gif87a\xAF\x83\x94")-1, 74 "http://www.example.com/foo?param=tt.gif", 75 "", "application/octet-stream" }, 76 { "%!PS-Adobe-3.0", sizeof("%!PS-Adobe-3.0")-1, 77 "http://www.example.com/foo", 78 "text/plain", "text/plain" }, 79 { "\x89" "PNG\x0D\x0A\x1A\x0A", sizeof("\x89" "PNG\x0D\x0A\x1A\x0A")-1, 80 "http://www.example.com/foo", 81 "application/octet-stream", "application/octet-stream" }, 82 { "\xFF\xD8\xFF\x23\x49\xAF", sizeof("\xFF\xD8\xFF\x23\x49\xAF")-1, 83 "http://www.example.com/foo", 84 "", "image/jpeg" }, 85 }; 86 87 TestArray(tests, arraysize(tests)); 88 } 89 90 TEST(MimeSnifferTest, ChromeExtensionsTest) { 91 SnifferTest tests[] = { 92 // schemes 93 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 94 "http://www.example.com/foo.crx", 95 "", "application/x-chrome-extension" }, 96 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 97 "https://www.example.com/foo.crx", 98 "", "application/x-chrome-extension" }, 99 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 100 "ftp://www.example.com/foo.crx", 101 "", "application/x-chrome-extension" }, 102 103 // some other mimetypes that should get converted 104 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 105 "http://www.example.com/foo.crx", 106 "text/plain", "application/x-chrome-extension" }, 107 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 108 "http://www.example.com/foo.crx", 109 "application/octet-stream", "application/x-chrome-extension" }, 110 111 // success edge cases 112 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 113 "http://www.example.com/foo.crx?query=string", 114 "", "application/x-chrome-extension" }, 115 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 116 "http://www.example.com/foo..crx", 117 "", "application/x-chrome-extension" }, 118 119 // wrong file extension 120 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 121 "http://www.example.com/foo.bin", 122 "", "application/octet-stream" }, 123 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 124 "http://www.example.com/foo.bin?monkey", 125 "", "application/octet-stream" }, 126 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 127 "invalid-url", 128 "", "application/octet-stream" }, 129 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 130 "http://www.example.com", 131 "", "application/octet-stream" }, 132 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 133 "http://www.example.com/", 134 "", "application/octet-stream" }, 135 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 136 "http://www.example.com/foo", 137 "", "application/octet-stream" }, 138 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 139 "http://www.example.com/foocrx", 140 "", "application/octet-stream" }, 141 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 142 "http://www.example.com/foo.crx.blech", 143 "", "application/octet-stream" }, 144 145 // wrong magic 146 { "Cr24\x02\x00\x00\x01", sizeof("Cr24\x02\x00\x00\x01")-1, 147 "http://www.example.com/foo.crx?monkey", 148 "", "application/octet-stream" }, 149 { "PADDING_Cr24\x02\x00\x00\x00", sizeof("PADDING_Cr24\x02\x00\x00\x00")-1, 150 "http://www.example.com/foo.crx?monkey", 151 "", "application/octet-stream" }, 152 }; 153 154 TestArray(tests, arraysize(tests)); 155 } 156 157 TEST(MimeSnifferTest, MozillaCompatibleTest) { 158 SnifferTest tests[] = { 159 { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1, 160 "http://www.example.com/", 161 "", "text/html" }, 162 { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1, 163 "http://www.example.com/", 164 "text/plain", "text/plain" }, 165 { "BMjlakdsfk", sizeof("BMjlakdsfk")-1, 166 "http://www.example.com/foo", 167 "", "image/bmp" }, 168 { "\x00\x00\x30\x00", sizeof("\x00\x00\x30\x00")-1, 169 "http://www.example.com/favicon.ico", 170 "", "application/octet-stream" }, 171 { "#!/bin/sh\nls /\n", sizeof("#!/bin/sh\nls /\n")-1, 172 "http://www.example.com/foo", 173 "", "text/plain" }, 174 { "From: Fred\nTo: Bob\n\nHi\n.\n", 175 sizeof("From: Fred\nTo: Bob\n\nHi\n.\n")-1, 176 "http://www.example.com/foo", 177 "", "text/plain" }, 178 { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 179 sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1, 180 "http://www.example.com/foo", 181 "", "text/xml" }, 182 { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 183 sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1, 184 "http://www.example.com/foo", 185 "application/octet-stream", "application/octet-stream" }, 186 }; 187 188 TestArray(tests, arraysize(tests)); 189 } 190 191 TEST(MimeSnifferTest, DontAllowPrivilegeEscalationTest) { 192 SnifferTest tests[] = { 193 { "GIF87a\n<html>\n<body>" 194 "<script>alert('haxorzed');\n</script>" 195 "</body></html>\n", 196 sizeof("GIF87a\n<html>\n<body>" 197 "<script>alert('haxorzed');\n</script>" 198 "</body></html>\n")-1, 199 "http://www.example.com/foo", 200 "", "image/gif" }, 201 { "GIF87a\n<html>\n<body>" 202 "<script>alert('haxorzed');\n</script>" 203 "</body></html>\n", 204 sizeof("GIF87a\n<html>\n<body>" 205 "<script>alert('haxorzed');\n</script>" 206 "</body></html>\n")-1, 207 "http://www.example.com/foo?q=ttt.html", 208 "", "image/gif" }, 209 { "GIF87a\n<html>\n<body>" 210 "<script>alert('haxorzed');\n</script>" 211 "</body></html>\n", 212 sizeof("GIF87a\n<html>\n<body>" 213 "<script>alert('haxorzed');\n</script>" 214 "</body></html>\n")-1, 215 "http://www.example.com/foo#ttt.html", 216 "", "image/gif" }, 217 { "a\n<html>\n<body>" 218 "<script>alert('haxorzed');\n</script>" 219 "</body></html>\n", 220 sizeof("a\n<html>\n<body>" 221 "<script>alert('haxorzed');\n</script>" 222 "</body></html>\n")-1, 223 "http://www.example.com/foo", 224 "", "text/plain" }, 225 { "a\n<html>\n<body>" 226 "<script>alert('haxorzed');\n</script>" 227 "</body></html>\n", 228 sizeof("a\n<html>\n<body>" 229 "<script>alert('haxorzed');\n</script>" 230 "</body></html>\n")-1, 231 "http://www.example.com/foo?q=ttt.html", 232 "", "text/plain" }, 233 { "a\n<html>\n<body>" 234 "<script>alert('haxorzed');\n</script>" 235 "</body></html>\n", 236 sizeof("a\n<html>\n<body>" 237 "<script>alert('haxorzed');\n</script>" 238 "</body></html>\n")-1, 239 "http://www.example.com/foo#ttt.html", 240 "", "text/plain" }, 241 { "a\n<html>\n<body>" 242 "<script>alert('haxorzed');\n</script>" 243 "</body></html>\n", 244 sizeof("a\n<html>\n<body>" 245 "<script>alert('haxorzed');\n</script>" 246 "</body></html>\n")-1, 247 "http://www.example.com/foo.html", 248 "", "text/plain" }, 249 }; 250 251 TestArray(tests, arraysize(tests)); 252 } 253 254 TEST(MimeSnifferTest, UnicodeTest) { 255 SnifferTest tests[] = { 256 { "\xEF\xBB\xBF" "Hi there", sizeof("\xEF\xBB\xBF" "Hi there")-1, 257 "http://www.example.com/foo", 258 "", "text/plain" }, 259 { "\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79", 260 sizeof("\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79")-1, 261 "http://www.example.com/foo", 262 "", "text/plain" }, 263 { "\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9", 264 sizeof("\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9")-1, 265 "http://www.example.com/foo", 266 "", "text/plain" }, 267 { "\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01", 268 sizeof("\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01")-1, 269 "http://www.example.com/foo", 270 "", "text/plain" }, 271 }; 272 273 TestArray(tests, arraysize(tests)); 274 } 275 276 TEST(MimeSnifferTest, FlashTest) { 277 SnifferTest tests[] = { 278 { "CWSdd\x00\xB3", sizeof("CWSdd\x00\xB3")-1, 279 "http://www.example.com/foo", 280 "", "application/octet-stream" }, 281 { "FLVjdkl*(#)0sdj\x00", sizeof("FLVjdkl*(#)0sdj\x00")-1, 282 "http://www.example.com/foo?q=ttt.swf", 283 "", "application/octet-stream" }, 284 { "FWS3$9\r\b\x00", sizeof("FWS3$9\r\b\x00")-1, 285 "http://www.example.com/foo#ttt.swf", 286 "", "application/octet-stream" }, 287 { "FLVjdkl*(#)0sdj", sizeof("FLVjdkl*(#)0sdj")-1, 288 "http://www.example.com/foo.swf", 289 "", "text/plain" }, 290 { "FLVjdkl*(#)0s\x01dj", sizeof("FLVjdkl*(#)0s\x01dj")-1, 291 "http://www.example.com/foo/bar.swf", 292 "", "application/octet-stream" }, 293 { "FWS3$9\r\b\x1A", sizeof("FWS3$9\r\b\x1A")-1, 294 "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar", 295 "", "application/octet-stream" }, 296 { "FWS3$9\r\x1C\b", sizeof("FWS3$9\r\x1C\b")-1, 297 "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar", 298 "text/plain", "application/octet-stream" }, 299 }; 300 301 TestArray(tests, arraysize(tests)); 302 } 303 304 TEST(MimeSnifferTest, XMLTest) { 305 // An easy feed to identify. 306 EXPECT_EQ("application/atom+xml", 307 SniffMimeType("<?xml?><feed", std::string(), "text/xml")); 308 // Don't sniff out of plain text. 309 EXPECT_EQ("text/plain", 310 SniffMimeType("<?xml?><feed", std::string(), "text/plain")); 311 // Simple RSS. 312 EXPECT_EQ("application/rss+xml", 313 SniffMimeType( 314 "<?xml version='1.0'?>\r\n<rss", std::string(), "text/xml")); 315 316 // The top of CNN's RSS feed, which we'd like to recognize as RSS. 317 static const char kCNNRSS[] = 318 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" 319 "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/rss2full.xsl\" " 320 "type=\"text/xsl\" media=\"screen\"?>" 321 "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/itemcontent.css\" " 322 "type=\"text/css\" media=\"screen\"?>" 323 "<rss xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" " 324 "version=\"2.0\">"; 325 // CNN's RSS 326 EXPECT_EQ("application/rss+xml", 327 SniffMimeType(kCNNRSS, std::string(), "text/xml")); 328 EXPECT_EQ("text/plain", SniffMimeType(kCNNRSS, std::string(), "text/plain")); 329 330 // Don't sniff random XML as something different. 331 EXPECT_EQ("text/xml", 332 SniffMimeType("<?xml?><notafeed", std::string(), "text/xml")); 333 // Don't sniff random plain-text as something different. 334 EXPECT_EQ("text/plain", 335 SniffMimeType("<?xml?><notafeed", std::string(), "text/plain")); 336 337 // Positive test for the two instances we upgrade to XHTML. 338 EXPECT_EQ("application/xhtml+xml", 339 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">", 340 std::string(), 341 "text/xml")); 342 EXPECT_EQ("application/xhtml+xml", 343 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">", 344 std::string(), 345 "application/xml")); 346 347 // Following our behavior with HTML, don't call other mime types XHTML. 348 EXPECT_EQ("text/plain", 349 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">", 350 std::string(), 351 "text/plain")); 352 EXPECT_EQ("application/rss+xml", 353 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">", 354 std::string(), 355 "application/rss+xml")); 356 357 // Don't sniff other HTML-looking bits as HTML. 358 EXPECT_EQ("text/xml", 359 SniffMimeType("<html><head>", std::string(), "text/xml")); 360 EXPECT_EQ("text/xml", 361 SniffMimeType("<foo><html xmlns=\"http://www.w3.org/1999/xhtml\">", 362 std::string(), 363 "text/xml")); 364 } 365 366 // Test content which is >= 1024 bytes, and includes no open angle bracket. 367 // http://code.google.com/p/chromium/issues/detail?id=3521 368 TEST(MimeSnifferTest, XMLTestLargeNoAngledBracket) { 369 // Make a large input, with 1024 bytes of "x". 370 std::string content; 371 content.resize(1024); 372 std::fill(content.begin(), content.end(), 'x'); 373 374 // content.size() >= 1024 so the sniff is unambiguous. 375 std::string mime_type; 376 EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(), 377 "text/xml", &mime_type)); 378 EXPECT_EQ("text/xml", mime_type); 379 } 380 381 // Test content which is >= 1024 bytes, and includes a binary looking byte. 382 // http://code.google.com/p/chromium/issues/detail?id=15314 383 TEST(MimeSnifferTest, LooksBinary) { 384 // Make a large input, with 1024 bytes of "x" and 1 byte of 0x01. 385 std::string content; 386 content.resize(1024); 387 std::fill(content.begin(), content.end(), 'x'); 388 content[1000] = 0x01; 389 390 // content.size() >= 1024 so the sniff is unambiguous. 391 std::string mime_type; 392 EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(), 393 "text/plain", &mime_type)); 394 EXPECT_EQ("application/octet-stream", mime_type); 395 } 396 397 TEST(MimeSnifferTest, OfficeTest) { 398 SnifferTest tests[] = { 399 // Check for URLs incorrectly reported as Microsoft Office files. 400 { "Hi there", 401 sizeof("Hi there")-1, 402 "http://www.example.com/foo.doc", 403 "application/msword", "application/octet-stream" }, 404 { "Hi there", 405 sizeof("Hi there")-1, 406 "http://www.example.com/foo.xls", 407 "application/vnd.ms-excel", "application/octet-stream" }, 408 { "Hi there", 409 sizeof("Hi there")-1, 410 "http://www.example.com/foo.ppt", 411 "application/vnd.ms-powerpoint", "application/octet-stream" }, 412 // Check for Microsoft Office files incorrectly reported as text. 413 { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there", 414 sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1, 415 "http://www.example.com/foo.doc", 416 "text/plain", "application/msword" }, 417 { "PK\x03\x04" "Hi there", 418 sizeof("PK\x03\x04" "Hi there")-1, 419 "http://www.example.com/foo.doc", 420 "text/plain", 421 "application/vnd.openxmlformats-officedocument." 422 "wordprocessingml.document" }, 423 { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there", 424 sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1, 425 "http://www.example.com/foo.xls", 426 "text/plain", "application/vnd.ms-excel" }, 427 { "PK\x03\x04" "Hi there", 428 sizeof("PK\x03\x04" "Hi there")-1, 429 "http://www.example.com/foo.xls", 430 "text/plain", 431 "application/vnd.openxmlformats-officedocument." 432 "spreadsheetml.sheet" }, 433 { "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there", 434 sizeof("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" "Hi there")-1, 435 "http://www.example.com/foo.ppt", 436 "text/plain", "application/vnd.ms-powerpoint" }, 437 { "PK\x03\x04" "Hi there", 438 sizeof("PK\x03\x04" "Hi there")-1, 439 "http://www.example.com/foo.ppt", 440 "text/plain", 441 "application/vnd.openxmlformats-officedocument." 442 "presentationml.presentation" }, 443 }; 444 445 TestArray(tests, arraysize(tests)); 446 } 447 448 // TODO(thestig) Add more tests for other AV formats. Add another test case for 449 // RAW images. 450 TEST(MimeSnifferTest, AudioVideoTest) { 451 std::string mime_type; 452 const char kFlacTestData[] = 453 "fLaC\x00\x00\x00\x22\x12\x00\x12\x00\x00\x00\x00\x00"; 454 EXPECT_TRUE(SniffMimeTypeFromLocalData(kFlacTestData, 455 sizeof(kFlacTestData), 456 &mime_type)); 457 EXPECT_EQ("audio/x-flac", mime_type); 458 mime_type.clear(); 459 460 const char kWMATestData[] = 461 "\x30\x26\xb2\x75\x8e\x66\xcf\x11\xa6\xd9\x00\xaa\x00\x62\xce\x6c"; 462 EXPECT_TRUE(SniffMimeTypeFromLocalData(kWMATestData, 463 sizeof(kWMATestData), 464 &mime_type)); 465 EXPECT_EQ("video/x-ms-asf", mime_type); 466 mime_type.clear(); 467 468 // mp4a, m4b, m4p, and alac extension files which share the same container 469 // format. 470 const char kMP4TestData[] = 471 "\x00\x00\x00\x20\x66\x74\x79\x70\x4d\x34\x41\x20\x00\x00\x00\x00"; 472 EXPECT_TRUE(SniffMimeTypeFromLocalData(kMP4TestData, 473 sizeof(kMP4TestData), 474 &mime_type)); 475 EXPECT_EQ("video/mp4", mime_type); 476 mime_type.clear(); 477 478 const char kAACTestData[] = 479 "\xff\xf1\x50\x80\x02\x20\xb0\x23\x0a\x83\x20\x7d\x61\x90\x3e\xb1"; 480 EXPECT_TRUE(SniffMimeTypeFromLocalData(kAACTestData, 481 sizeof(kAACTestData), 482 &mime_type)); 483 EXPECT_EQ("audio/mpeg", mime_type); 484 mime_type.clear(); 485 } 486 487 } // namespace net 488