1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/basictypes.h" 6 #include "googleurl/src/gurl.h" 7 #include "net/base/mime_sniffer.h" 8 #include "testing/gtest/include/gtest/gtest.h" 9 10 namespace net { 11 12 struct SnifferTest { 13 const char* content; 14 size_t content_len; 15 std::string url; 16 std::string type_hint; 17 const char* mime_type; 18 }; 19 20 static void TestArray(SnifferTest* tests, size_t count) { 21 std::string mime_type; 22 23 for (size_t i = 0; i < count; ++i) { 24 SniffMimeType(tests[i].content, 25 tests[i].content_len, 26 GURL(tests[i].url), 27 tests[i].type_hint, 28 &mime_type); 29 EXPECT_EQ(tests[i].mime_type, mime_type); 30 } 31 } 32 33 // TODO(evanm): convert other tests to use SniffMimeType instead of TestArray, 34 // so the error messages produced by test failures are more useful. 35 static std::string SniffMimeType(const std::string& content, 36 const std::string& url, 37 const std::string& mime_type_hint) { 38 std::string mime_type; 39 SniffMimeType(content.data(), content.size(), GURL(url), 40 mime_type_hint, &mime_type); 41 return mime_type; 42 } 43 44 TEST(MimeSnifferTest, BoundaryConditionsTest) { 45 std::string mime_type; 46 std::string type_hint; 47 48 char buf[] = { 49 'd', '\x1f', '\xFF' 50 }; 51 52 GURL url; 53 54 SniffMimeType(buf, 0, url, type_hint, &mime_type); 55 EXPECT_EQ("text/plain", mime_type); 56 SniffMimeType(buf, 1, url, type_hint, &mime_type); 57 EXPECT_EQ("text/plain", mime_type); 58 SniffMimeType(buf, 2, url, type_hint, &mime_type); 59 EXPECT_EQ("application/octet-stream", mime_type); 60 } 61 62 TEST(MimeSnifferTest, BasicSniffingTest) { 63 SnifferTest tests[] = { 64 { "<!DOCTYPE html PUBLIC", sizeof("<!DOCTYPE html PUBLIC")-1, 65 "http://www.example.com/", 66 "", "text/html" }, 67 { "<HtMl><Body></body></htMl>", sizeof("<HtMl><Body></body></htMl>")-1, 68 "http://www.example.com/foo.gif", 69 "application/octet-stream", "application/octet-stream" }, 70 { "GIF89a\x1F\x83\x94", sizeof("GIF89a\xAF\x83\x94")-1, 71 "http://www.example.com/foo", 72 "text/plain", "image/gif" }, 73 { "Gif87a\x1F\x83\x94", sizeof("Gif87a\xAF\x83\x94")-1, 74 "http://www.example.com/foo?param=tt.gif", 75 "", "application/octet-stream" }, 76 { "%!PS-Adobe-3.0", sizeof("%!PS-Adobe-3.0")-1, 77 "http://www.example.com/foo", 78 "text/plain", "text/plain" }, 79 { "\x89" "PNG\x0D\x0A\x1A\x0A", sizeof("\x89" "PNG\x0D\x0A\x1A\x0A")-1, 80 "http://www.example.com/foo", 81 "application/octet-stream", "application/octet-stream" }, 82 { "\xFF\xD8\xFF\x23\x49\xAF", sizeof("\xFF\xD8\xFF\x23\x49\xAF")-1, 83 "http://www.example.com/foo", 84 "", "image/jpeg" }, 85 }; 86 87 TestArray(tests, arraysize(tests)); 88 } 89 90 TEST(MimeSnifferTest, ChromeExtensionsTest) { 91 SnifferTest tests[] = { 92 // schemes 93 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 94 "http://www.example.com/foo.crx", 95 "", "application/x-chrome-extension" }, 96 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 97 "https://www.example.com/foo.crx", 98 "", "application/x-chrome-extension" }, 99 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 100 "ftp://www.example.com/foo.crx", 101 "", "application/x-chrome-extension" }, 102 103 // some other mimetypes that should get converted 104 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 105 "http://www.example.com/foo.crx", 106 "text/plain", "application/x-chrome-extension" }, 107 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 108 "http://www.example.com/foo.crx", 109 "application/octet-stream", "application/x-chrome-extension" }, 110 111 // success edge cases 112 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 113 "http://www.example.com/foo.crx?query=string", 114 "", "application/x-chrome-extension" }, 115 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 116 "http://www.example.com/foo..crx", 117 "", "application/x-chrome-extension" }, 118 119 // wrong file extension 120 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 121 "http://www.example.com/foo.bin", 122 "", "application/octet-stream" }, 123 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 124 "http://www.example.com/foo.bin?monkey", 125 "", "application/octet-stream" }, 126 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 127 "invalid-url", 128 "", "application/octet-stream" }, 129 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 130 "http://www.example.com", 131 "", "application/octet-stream" }, 132 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 133 "http://www.example.com/", 134 "", "application/octet-stream" }, 135 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 136 "http://www.example.com/foo", 137 "", "application/octet-stream" }, 138 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 139 "http://www.example.com/foocrx", 140 "", "application/octet-stream" }, 141 { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1, 142 "http://www.example.com/foo.crx.blech", 143 "", "application/octet-stream" }, 144 145 // wrong magic 146 { "Cr24\x02\x00\x00\x01", sizeof("Cr24\x02\x00\x00\x01")-1, 147 "http://www.example.com/foo.crx?monkey", 148 "", "application/octet-stream" }, 149 { "PADDING_Cr24\x02\x00\x00\x00", sizeof("PADDING_Cr24\x02\x00\x00\x00")-1, 150 "http://www.example.com/foo.crx?monkey", 151 "", "application/octet-stream" }, 152 }; 153 154 TestArray(tests, arraysize(tests)); 155 } 156 157 TEST(MimeSnifferTest, MozillaCompatibleTest) { 158 SnifferTest tests[] = { 159 { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1, 160 "http://www.example.com/", 161 "", "text/html" }, 162 { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1, 163 "http://www.example.com/", 164 "text/plain", "text/plain" }, 165 { "BMjlakdsfk", sizeof("BMjlakdsfk")-1, 166 "http://www.example.com/foo", 167 "", "image/bmp" }, 168 { "\x00\x00\x30\x00", sizeof("\x00\x00\x30\x00")-1, 169 "http://www.example.com/favicon.ico", 170 "", "application/octet-stream" }, 171 { "#!/bin/sh\nls /\n", sizeof("#!/bin/sh\nls /\n")-1, 172 "http://www.example.com/foo", 173 "", "text/plain" }, 174 { "From: Fred\nTo: Bob\n\nHi\n.\n", 175 sizeof("From: Fred\nTo: Bob\n\nHi\n.\n")-1, 176 "http://www.example.com/foo", 177 "", "text/plain" }, 178 { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 179 sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1, 180 "http://www.example.com/foo", 181 "", "text/xml" }, 182 { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 183 sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1, 184 "http://www.example.com/foo", 185 "application/octet-stream", "application/octet-stream" }, 186 }; 187 188 TestArray(tests, arraysize(tests)); 189 } 190 191 TEST(MimeSnifferTest, DontAllowPrivilegeEscalationTest) { 192 SnifferTest tests[] = { 193 { "GIF87a\n<html>\n<body>" 194 "<script>alert('haxorzed');\n</script>" 195 "</body></html>\n", 196 sizeof("GIF87a\n<html>\n<body>" 197 "<script>alert('haxorzed');\n</script>" 198 "</body></html>\n")-1, 199 "http://www.example.com/foo", 200 "", "image/gif" }, 201 { "GIF87a\n<html>\n<body>" 202 "<script>alert('haxorzed');\n</script>" 203 "</body></html>\n", 204 sizeof("GIF87a\n<html>\n<body>" 205 "<script>alert('haxorzed');\n</script>" 206 "</body></html>\n")-1, 207 "http://www.example.com/foo?q=ttt.html", 208 "", "image/gif" }, 209 { "GIF87a\n<html>\n<body>" 210 "<script>alert('haxorzed');\n</script>" 211 "</body></html>\n", 212 sizeof("GIF87a\n<html>\n<body>" 213 "<script>alert('haxorzed');\n</script>" 214 "</body></html>\n")-1, 215 "http://www.example.com/foo#ttt.html", 216 "", "image/gif" }, 217 { "a\n<html>\n<body>" 218 "<script>alert('haxorzed');\n</script>" 219 "</body></html>\n", 220 sizeof("a\n<html>\n<body>" 221 "<script>alert('haxorzed');\n</script>" 222 "</body></html>\n")-1, 223 "http://www.example.com/foo", 224 "", "text/plain" }, 225 { "a\n<html>\n<body>" 226 "<script>alert('haxorzed');\n</script>" 227 "</body></html>\n", 228 sizeof("a\n<html>\n<body>" 229 "<script>alert('haxorzed');\n</script>" 230 "</body></html>\n")-1, 231 "http://www.example.com/foo?q=ttt.html", 232 "", "text/plain" }, 233 { "a\n<html>\n<body>" 234 "<script>alert('haxorzed');\n</script>" 235 "</body></html>\n", 236 sizeof("a\n<html>\n<body>" 237 "<script>alert('haxorzed');\n</script>" 238 "</body></html>\n")-1, 239 "http://www.example.com/foo#ttt.html", 240 "", "text/plain" }, 241 { "a\n<html>\n<body>" 242 "<script>alert('haxorzed');\n</script>" 243 "</body></html>\n", 244 sizeof("a\n<html>\n<body>" 245 "<script>alert('haxorzed');\n</script>" 246 "</body></html>\n")-1, 247 "http://www.example.com/foo.html", 248 "", "text/plain" }, 249 }; 250 251 TestArray(tests, arraysize(tests)); 252 } 253 254 TEST(MimeSnifferTest, UnicodeTest) { 255 SnifferTest tests[] = { 256 { "\xEF\xBB\xBF" "Hi there", sizeof("\xEF\xBB\xBF" "Hi there")-1, 257 "http://www.example.com/foo", 258 "", "text/plain" }, 259 { "\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79", 260 sizeof("\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79")-1, 261 "http://www.example.com/foo", 262 "", "text/plain" }, 263 { "\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9", 264 sizeof("\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9")-1, 265 "http://www.example.com/foo", 266 "", "text/plain" }, 267 { "\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01", 268 sizeof("\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01")-1, 269 "http://www.example.com/foo", 270 "", "text/plain" }, 271 }; 272 273 TestArray(tests, arraysize(tests)); 274 } 275 276 TEST(MimeSnifferTest, FlashTest) { 277 SnifferTest tests[] = { 278 { "CWSdd\x00\xB3", sizeof("CWSdd\x00\xB3")-1, 279 "http://www.example.com/foo", 280 "", "application/octet-stream" }, 281 { "FLVjdkl*(#)0sdj\x00", sizeof("FLVjdkl*(#)0sdj\x00")-1, 282 "http://www.example.com/foo?q=ttt.swf", 283 "", "application/octet-stream" }, 284 { "FWS3$9\r\b\x00", sizeof("FWS3$9\r\b\x00")-1, 285 "http://www.example.com/foo#ttt.swf", 286 "", "application/octet-stream" }, 287 { "FLVjdkl*(#)0sdj", sizeof("FLVjdkl*(#)0sdj")-1, 288 "http://www.example.com/foo.swf", 289 "", "text/plain" }, 290 { "FLVjdkl*(#)0s\x01dj", sizeof("FLVjdkl*(#)0s\x01dj")-1, 291 "http://www.example.com/foo/bar.swf", 292 "", "application/octet-stream" }, 293 { "FWS3$9\r\b\x1A", sizeof("FWS3$9\r\b\x1A")-1, 294 "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar", 295 "", "application/octet-stream" }, 296 { "FWS3$9\r\x1C\b", sizeof("FWS3$9\r\x1C\b")-1, 297 "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar", 298 "text/plain", "application/octet-stream" }, 299 }; 300 301 TestArray(tests, arraysize(tests)); 302 } 303 304 TEST(MimeSnifferTest, XMLTest) { 305 // An easy feed to identify. 306 EXPECT_EQ("application/atom+xml", 307 SniffMimeType("<?xml?><feed", "", "text/xml")); 308 // Don't sniff out of plain text. 309 EXPECT_EQ("text/plain", 310 SniffMimeType("<?xml?><feed", "", "text/plain")); 311 // Simple RSS. 312 EXPECT_EQ("application/rss+xml", 313 SniffMimeType("<?xml version='1.0'?>\r\n<rss", "", "text/xml")); 314 315 // The top of CNN's RSS feed, which we'd like to recognize as RSS. 316 static const char kCNNRSS[] = 317 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" 318 "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/rss2full.xsl\" " 319 "type=\"text/xsl\" media=\"screen\"?>" 320 "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/itemcontent.css\" " 321 "type=\"text/css\" media=\"screen\"?>" 322 "<rss xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" " 323 "version=\"2.0\">"; 324 // CNN's RSS 325 EXPECT_EQ("application/rss+xml", 326 SniffMimeType(kCNNRSS, "", "text/xml")); 327 EXPECT_EQ("text/plain", 328 SniffMimeType(kCNNRSS, "", "text/plain")); 329 330 // Don't sniff random XML as something different. 331 EXPECT_EQ("text/xml", 332 SniffMimeType("<?xml?><notafeed", "", "text/xml")); 333 // Don't sniff random plain-text as something different. 334 EXPECT_EQ("text/plain", 335 SniffMimeType("<?xml?><notafeed", "", "text/plain")); 336 337 // Positive test for the two instances we upgrade to XHTML. 338 EXPECT_EQ("application/xhtml+xml", 339 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">", 340 "", "text/xml")); 341 EXPECT_EQ("application/xhtml+xml", 342 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">", 343 "", "application/xml")); 344 345 // Following our behavior with HTML, don't call other mime types XHTML. 346 EXPECT_EQ("text/plain", 347 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">", 348 "", "text/plain")); 349 EXPECT_EQ("application/rss+xml", 350 SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">", 351 "", "application/rss+xml")); 352 353 // Don't sniff other HTML-looking bits as HTML. 354 EXPECT_EQ("text/xml", 355 SniffMimeType("<html><head>", "", "text/xml")); 356 EXPECT_EQ("text/xml", 357 SniffMimeType("<foo><html xmlns=\"http://www.w3.org/1999/xhtml\">", 358 "", "text/xml")); 359 360 } 361 362 // Test content which is >= 1024 bytes, and includes no open angle bracket. 363 // http://code.google.com/p/chromium/issues/detail?id=3521 364 TEST(MimeSnifferTest, XMLTestLargeNoAngledBracket) { 365 // Make a large input, with 1024 bytes of "x". 366 std::string content; 367 content.resize(1024); 368 std::fill(content.begin(), content.end(), 'x'); 369 370 // content.size() >= 1024 so the sniff is unambiguous. 371 std::string mime_type; 372 EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(), 373 "text/xml", &mime_type)); 374 EXPECT_EQ("text/xml", mime_type); 375 } 376 377 // Test content which is >= 1024 bytes, and includes a binary looking byte. 378 // http://code.google.com/p/chromium/issues/detail?id=15314 379 TEST(MimeSnifferTest, LooksBinary) { 380 // Make a large input, with 1024 bytes of "x" and 1 byte of 0x01. 381 std::string content; 382 content.resize(1024); 383 std::fill(content.begin(), content.end(), 'x'); 384 content[1000] = 0x01; 385 386 // content.size() >= 1024 so the sniff is unambiguous. 387 std::string mime_type; 388 EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(), 389 "text/plain", &mime_type)); 390 EXPECT_EQ("application/octet-stream", mime_type); 391 } 392 393 } // namespace net 394