Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/basictypes.h"
      6 #include "googleurl/src/gurl.h"
      7 #include "net/base/mime_sniffer.h"
      8 #include "testing/gtest/include/gtest/gtest.h"
      9 
     10 namespace net {
     11 
     12 struct SnifferTest {
     13   const char* content;
     14   size_t content_len;
     15   std::string url;
     16   std::string type_hint;
     17   const char* mime_type;
     18 };
     19 
     20 static void TestArray(SnifferTest* tests, size_t count) {
     21   std::string mime_type;
     22 
     23   for (size_t i = 0; i < count; ++i) {
     24     SniffMimeType(tests[i].content,
     25                        tests[i].content_len,
     26                        GURL(tests[i].url),
     27                        tests[i].type_hint,
     28                        &mime_type);
     29     EXPECT_EQ(tests[i].mime_type, mime_type);
     30   }
     31 }
     32 
     33 // TODO(evanm): convert other tests to use SniffMimeType instead of TestArray,
     34 // so the error messages produced by test failures are more useful.
     35 static std::string SniffMimeType(const std::string& content,
     36                                  const std::string& url,
     37                                  const std::string& mime_type_hint) {
     38   std::string mime_type;
     39   SniffMimeType(content.data(), content.size(), GURL(url),
     40                      mime_type_hint, &mime_type);
     41   return mime_type;
     42 }
     43 
     44 TEST(MimeSnifferTest, BoundaryConditionsTest) {
     45   std::string mime_type;
     46   std::string type_hint;
     47 
     48   char buf[] = {
     49     'd', '\x1f', '\xFF'
     50   };
     51 
     52   GURL url;
     53 
     54   SniffMimeType(buf, 0, url, type_hint, &mime_type);
     55   EXPECT_EQ("text/plain", mime_type);
     56   SniffMimeType(buf, 1, url, type_hint, &mime_type);
     57   EXPECT_EQ("text/plain", mime_type);
     58   SniffMimeType(buf, 2, url, type_hint, &mime_type);
     59   EXPECT_EQ("application/octet-stream", mime_type);
     60 }
     61 
     62 TEST(MimeSnifferTest, BasicSniffingTest) {
     63   SnifferTest tests[] = {
     64     { "<!DOCTYPE html PUBLIC", sizeof("<!DOCTYPE html PUBLIC")-1,
     65       "http://www.example.com/",
     66       "", "text/html" },
     67     { "<HtMl><Body></body></htMl>", sizeof("<HtMl><Body></body></htMl>")-1,
     68       "http://www.example.com/foo.gif",
     69       "application/octet-stream", "application/octet-stream" },
     70     { "GIF89a\x1F\x83\x94", sizeof("GIF89a\xAF\x83\x94")-1,
     71       "http://www.example.com/foo",
     72       "text/plain", "image/gif" },
     73     { "Gif87a\x1F\x83\x94", sizeof("Gif87a\xAF\x83\x94")-1,
     74       "http://www.example.com/foo?param=tt.gif",
     75       "", "application/octet-stream" },
     76     { "%!PS-Adobe-3.0", sizeof("%!PS-Adobe-3.0")-1,
     77       "http://www.example.com/foo",
     78       "text/plain", "text/plain" },
     79     { "\x89" "PNG\x0D\x0A\x1A\x0A", sizeof("\x89" "PNG\x0D\x0A\x1A\x0A")-1,
     80       "http://www.example.com/foo",
     81       "application/octet-stream", "application/octet-stream" },
     82     { "\xFF\xD8\xFF\x23\x49\xAF", sizeof("\xFF\xD8\xFF\x23\x49\xAF")-1,
     83       "http://www.example.com/foo",
     84       "", "image/jpeg" },
     85   };
     86 
     87   TestArray(tests, arraysize(tests));
     88 }
     89 
     90 TEST(MimeSnifferTest, ChromeExtensionsTest) {
     91   SnifferTest tests[] = {
     92     // schemes
     93     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
     94       "http://www.example.com/foo.crx",
     95       "", "application/x-chrome-extension" },
     96     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
     97       "https://www.example.com/foo.crx",
     98       "", "application/x-chrome-extension" },
     99     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
    100       "ftp://www.example.com/foo.crx",
    101       "", "application/x-chrome-extension" },
    102 
    103     // some other mimetypes that should get converted
    104     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
    105       "http://www.example.com/foo.crx",
    106       "text/plain", "application/x-chrome-extension" },
    107     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
    108       "http://www.example.com/foo.crx",
    109       "application/octet-stream", "application/x-chrome-extension" },
    110 
    111     // success edge cases
    112     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
    113       "http://www.example.com/foo.crx?query=string",
    114       "", "application/x-chrome-extension" },
    115     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
    116       "http://www.example.com/foo..crx",
    117       "", "application/x-chrome-extension" },
    118 
    119     // wrong file extension
    120     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
    121       "http://www.example.com/foo.bin",
    122       "", "application/octet-stream" },
    123     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
    124       "http://www.example.com/foo.bin?monkey",
    125       "", "application/octet-stream" },
    126     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
    127       "invalid-url",
    128       "", "application/octet-stream" },
    129     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
    130       "http://www.example.com",
    131       "", "application/octet-stream" },
    132     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
    133       "http://www.example.com/",
    134       "", "application/octet-stream" },
    135     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
    136       "http://www.example.com/foo",
    137       "", "application/octet-stream" },
    138     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
    139       "http://www.example.com/foocrx",
    140       "", "application/octet-stream" },
    141     { "Cr24\x02\x00\x00\x00", sizeof("Cr24\x02\x00\x00\x00")-1,
    142       "http://www.example.com/foo.crx.blech",
    143       "", "application/octet-stream" },
    144 
    145     // wrong magic
    146     { "Cr24\x02\x00\x00\x01", sizeof("Cr24\x02\x00\x00\x01")-1,
    147       "http://www.example.com/foo.crx?monkey",
    148       "", "application/octet-stream" },
    149     { "PADDING_Cr24\x02\x00\x00\x00", sizeof("PADDING_Cr24\x02\x00\x00\x00")-1,
    150       "http://www.example.com/foo.crx?monkey",
    151       "", "application/octet-stream" },
    152   };
    153 
    154   TestArray(tests, arraysize(tests));
    155 }
    156 
    157 TEST(MimeSnifferTest, MozillaCompatibleTest) {
    158   SnifferTest tests[] = {
    159     { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
    160       "http://www.example.com/",
    161       "", "text/html" },
    162     { " \n <hTmL>\n <hea", sizeof(" \n <hTmL>\n <hea")-1,
    163       "http://www.example.com/",
    164       "text/plain", "text/plain" },
    165     { "BMjlakdsfk", sizeof("BMjlakdsfk")-1,
    166       "http://www.example.com/foo",
    167       "", "image/bmp" },
    168     { "\x00\x00\x30\x00", sizeof("\x00\x00\x30\x00")-1,
    169       "http://www.example.com/favicon.ico",
    170       "", "application/octet-stream" },
    171     { "#!/bin/sh\nls /\n", sizeof("#!/bin/sh\nls /\n")-1,
    172       "http://www.example.com/foo",
    173       "", "text/plain" },
    174     { "From: Fred\nTo: Bob\n\nHi\n.\n",
    175       sizeof("From: Fred\nTo: Bob\n\nHi\n.\n")-1,
    176       "http://www.example.com/foo",
    177       "", "text/plain" },
    178     { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
    179       sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
    180       "http://www.example.com/foo",
    181       "", "text/xml" },
    182     { "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
    183       sizeof("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")-1,
    184       "http://www.example.com/foo",
    185       "application/octet-stream", "application/octet-stream" },
    186   };
    187 
    188   TestArray(tests, arraysize(tests));
    189 }
    190 
    191 TEST(MimeSnifferTest, DontAllowPrivilegeEscalationTest) {
    192   SnifferTest tests[] = {
    193     { "GIF87a\n<html>\n<body>"
    194         "<script>alert('haxorzed');\n</script>"
    195         "</body></html>\n",
    196       sizeof("GIF87a\n<html>\n<body>"
    197         "<script>alert('haxorzed');\n</script>"
    198         "</body></html>\n")-1,
    199       "http://www.example.com/foo",
    200       "", "image/gif" },
    201     { "GIF87a\n<html>\n<body>"
    202         "<script>alert('haxorzed');\n</script>"
    203         "</body></html>\n",
    204       sizeof("GIF87a\n<html>\n<body>"
    205         "<script>alert('haxorzed');\n</script>"
    206         "</body></html>\n")-1,
    207       "http://www.example.com/foo?q=ttt.html",
    208       "", "image/gif" },
    209     { "GIF87a\n<html>\n<body>"
    210         "<script>alert('haxorzed');\n</script>"
    211         "</body></html>\n",
    212       sizeof("GIF87a\n<html>\n<body>"
    213         "<script>alert('haxorzed');\n</script>"
    214         "</body></html>\n")-1,
    215       "http://www.example.com/foo#ttt.html",
    216       "", "image/gif" },
    217     { "a\n<html>\n<body>"
    218         "<script>alert('haxorzed');\n</script>"
    219         "</body></html>\n",
    220       sizeof("a\n<html>\n<body>"
    221         "<script>alert('haxorzed');\n</script>"
    222         "</body></html>\n")-1,
    223       "http://www.example.com/foo",
    224       "", "text/plain" },
    225     { "a\n<html>\n<body>"
    226         "<script>alert('haxorzed');\n</script>"
    227         "</body></html>\n",
    228       sizeof("a\n<html>\n<body>"
    229         "<script>alert('haxorzed');\n</script>"
    230         "</body></html>\n")-1,
    231       "http://www.example.com/foo?q=ttt.html",
    232       "", "text/plain" },
    233     { "a\n<html>\n<body>"
    234         "<script>alert('haxorzed');\n</script>"
    235         "</body></html>\n",
    236       sizeof("a\n<html>\n<body>"
    237         "<script>alert('haxorzed');\n</script>"
    238         "</body></html>\n")-1,
    239       "http://www.example.com/foo#ttt.html",
    240       "", "text/plain" },
    241     { "a\n<html>\n<body>"
    242         "<script>alert('haxorzed');\n</script>"
    243         "</body></html>\n",
    244       sizeof("a\n<html>\n<body>"
    245         "<script>alert('haxorzed');\n</script>"
    246         "</body></html>\n")-1,
    247       "http://www.example.com/foo.html",
    248       "", "text/plain" },
    249   };
    250 
    251   TestArray(tests, arraysize(tests));
    252 }
    253 
    254 TEST(MimeSnifferTest, UnicodeTest) {
    255   SnifferTest tests[] = {
    256     { "\xEF\xBB\xBF" "Hi there", sizeof("\xEF\xBB\xBF" "Hi there")-1,
    257       "http://www.example.com/foo",
    258       "", "text/plain" },
    259     { "\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79",
    260       sizeof("\xEF\xBB\xBF\xED\x7A\xAD\x7A\x0D\x79")-1,
    261       "http://www.example.com/foo",
    262       "", "text/plain" },
    263     { "\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9",
    264       sizeof("\xFE\xFF\xD0\xA5\xD0\xBE\xD0\xBB\xD1\x83\xD0\xB9")-1,
    265       "http://www.example.com/foo",
    266       "", "text/plain" },
    267     { "\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01",
    268       sizeof("\xFE\xFF\x00\x41\x00\x20\xD8\x00\xDC\x00\xD8\x00\xDC\x01")-1,
    269       "http://www.example.com/foo",
    270       "", "text/plain" },
    271   };
    272 
    273   TestArray(tests, arraysize(tests));
    274 }
    275 
    276 TEST(MimeSnifferTest, FlashTest) {
    277   SnifferTest tests[] = {
    278     { "CWSdd\x00\xB3", sizeof("CWSdd\x00\xB3")-1,
    279       "http://www.example.com/foo",
    280       "", "application/octet-stream" },
    281     { "FLVjdkl*(#)0sdj\x00", sizeof("FLVjdkl*(#)0sdj\x00")-1,
    282       "http://www.example.com/foo?q=ttt.swf",
    283       "", "application/octet-stream" },
    284     { "FWS3$9\r\b\x00", sizeof("FWS3$9\r\b\x00")-1,
    285       "http://www.example.com/foo#ttt.swf",
    286       "", "application/octet-stream" },
    287     { "FLVjdkl*(#)0sdj", sizeof("FLVjdkl*(#)0sdj")-1,
    288       "http://www.example.com/foo.swf",
    289       "", "text/plain" },
    290     { "FLVjdkl*(#)0s\x01dj", sizeof("FLVjdkl*(#)0s\x01dj")-1,
    291       "http://www.example.com/foo/bar.swf",
    292       "", "application/octet-stream" },
    293     { "FWS3$9\r\b\x1A", sizeof("FWS3$9\r\b\x1A")-1,
    294       "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
    295       "", "application/octet-stream" },
    296     { "FWS3$9\r\x1C\b", sizeof("FWS3$9\r\x1C\b")-1,
    297       "http://www.example.com/foo.swf?clickTAG=http://www.adnetwork.com/bar",
    298       "text/plain", "application/octet-stream" },
    299   };
    300 
    301   TestArray(tests, arraysize(tests));
    302 }
    303 
    304 TEST(MimeSnifferTest, XMLTest) {
    305   // An easy feed to identify.
    306   EXPECT_EQ("application/atom+xml",
    307             SniffMimeType("<?xml?><feed", "", "text/xml"));
    308   // Don't sniff out of plain text.
    309   EXPECT_EQ("text/plain",
    310             SniffMimeType("<?xml?><feed", "", "text/plain"));
    311   // Simple RSS.
    312   EXPECT_EQ("application/rss+xml",
    313             SniffMimeType("<?xml version='1.0'?>\r\n<rss", "", "text/xml"));
    314 
    315   // The top of CNN's RSS feed, which we'd like to recognize as RSS.
    316   static const char kCNNRSS[] =
    317       "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
    318       "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/rss2full.xsl\" "
    319       "type=\"text/xsl\" media=\"screen\"?>"
    320       "<?xml-stylesheet href=\"http://rss.cnn.com/~d/styles/itemcontent.css\" "
    321       "type=\"text/css\" media=\"screen\"?>"
    322       "<rss xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" "
    323       "version=\"2.0\">";
    324   // CNN's RSS
    325   EXPECT_EQ("application/rss+xml",
    326             SniffMimeType(kCNNRSS, "", "text/xml"));
    327   EXPECT_EQ("text/plain",
    328             SniffMimeType(kCNNRSS, "", "text/plain"));
    329 
    330   // Don't sniff random XML as something different.
    331   EXPECT_EQ("text/xml",
    332             SniffMimeType("<?xml?><notafeed", "", "text/xml"));
    333   // Don't sniff random plain-text as something different.
    334   EXPECT_EQ("text/plain",
    335             SniffMimeType("<?xml?><notafeed", "", "text/plain"));
    336 
    337   // Positive test for the two instances we upgrade to XHTML.
    338   EXPECT_EQ("application/xhtml+xml",
    339             SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
    340                           "", "text/xml"));
    341   EXPECT_EQ("application/xhtml+xml",
    342             SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
    343                           "", "application/xml"));
    344 
    345   // Following our behavior with HTML, don't call other mime types XHTML.
    346   EXPECT_EQ("text/plain",
    347             SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
    348                           "", "text/plain"));
    349   EXPECT_EQ("application/rss+xml",
    350             SniffMimeType("<html xmlns=\"http://www.w3.org/1999/xhtml\">",
    351                           "", "application/rss+xml"));
    352 
    353   // Don't sniff other HTML-looking bits as HTML.
    354   EXPECT_EQ("text/xml",
    355             SniffMimeType("<html><head>", "", "text/xml"));
    356   EXPECT_EQ("text/xml",
    357             SniffMimeType("<foo><html xmlns=\"http://www.w3.org/1999/xhtml\">",
    358                           "", "text/xml"));
    359 
    360 }
    361 
    362 // Test content which is >= 1024 bytes, and includes no open angle bracket.
    363 // http://code.google.com/p/chromium/issues/detail?id=3521
    364 TEST(MimeSnifferTest, XMLTestLargeNoAngledBracket) {
    365   // Make a large input, with 1024 bytes of "x".
    366   std::string content;
    367   content.resize(1024);
    368   std::fill(content.begin(), content.end(), 'x');
    369 
    370   // content.size() >= 1024 so the sniff is unambiguous.
    371   std::string mime_type;
    372   EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(),
    373                             "text/xml", &mime_type));
    374   EXPECT_EQ("text/xml", mime_type);
    375 }
    376 
    377 // Test content which is >= 1024 bytes, and includes a binary looking byte.
    378 // http://code.google.com/p/chromium/issues/detail?id=15314
    379 TEST(MimeSnifferTest, LooksBinary) {
    380   // Make a large input, with 1024 bytes of "x" and 1 byte of 0x01.
    381   std::string content;
    382   content.resize(1024);
    383   std::fill(content.begin(), content.end(), 'x');
    384   content[1000] = 0x01;
    385 
    386   // content.size() >= 1024 so the sniff is unambiguous.
    387   std::string mime_type;
    388   EXPECT_TRUE(SniffMimeType(content.data(), content.size(), GURL(),
    389                             "text/plain", &mime_type));
    390   EXPECT_EQ("application/octet-stream", mime_type);
    391 }
    392 
    393 }  // namespace net
    394