Home | History | Annotate | Download | only in safe_browsing
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <algorithm>
      6 
      7 #include "base/string_util.h"
      8 #include "crypto/sha2.h"
      9 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
     10 #include "googleurl/src/gurl.h"
     11 #include "testing/gtest/include/gtest/gtest.h"
     12 
     13 namespace {
     14 
     15 bool VectorContains(const std::vector<std::string>& data,
     16                     const std::string& str) {
     17   return std::find(data.begin(), data.end(), str) != data.end();
     18 }
     19 
     20 }
     21 
     22 // Tests that we generate the required host/path combinations for testing
     23 // according to the Safe Browsing spec.
     24 // See section 6.2 in
     25 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
     26 TEST(SafeBrowsingUtilTest, UrlParsing) {
     27   std::vector<std::string> hosts, paths;
     28 
     29   GURL url("http://a.b.c/1/2.html?param=1");
     30   safe_browsing_util::GenerateHostsToCheck(url, &hosts);
     31   safe_browsing_util::GeneratePathsToCheck(url, &paths);
     32   EXPECT_EQ(hosts.size(), static_cast<size_t>(2));
     33   EXPECT_EQ(paths.size(), static_cast<size_t>(4));
     34   EXPECT_EQ(hosts[0], "b.c");
     35   EXPECT_EQ(hosts[1], "a.b.c");
     36 
     37   EXPECT_TRUE(VectorContains(paths, "/1/2.html?param=1"));
     38   EXPECT_TRUE(VectorContains(paths, "/1/2.html"));
     39   EXPECT_TRUE(VectorContains(paths, "/1/"));
     40   EXPECT_TRUE(VectorContains(paths, "/"));
     41 
     42   url = GURL("http://a.b.c.d.e.f.g/1.html");
     43   safe_browsing_util::GenerateHostsToCheck(url, &hosts);
     44   safe_browsing_util::GeneratePathsToCheck(url, &paths);
     45   EXPECT_EQ(hosts.size(), static_cast<size_t>(5));
     46   EXPECT_EQ(paths.size(), static_cast<size_t>(2));
     47   EXPECT_EQ(hosts[0], "f.g");
     48   EXPECT_EQ(hosts[1], "e.f.g");
     49   EXPECT_EQ(hosts[2], "d.e.f.g");
     50   EXPECT_EQ(hosts[3], "c.d.e.f.g");
     51   EXPECT_EQ(hosts[4], "a.b.c.d.e.f.g");
     52   EXPECT_TRUE(VectorContains(paths, "/1.html"));
     53   EXPECT_TRUE(VectorContains(paths, "/"));
     54 
     55   url = GURL("http://a.b/saw-cgi/eBayISAPI.dll/");
     56   safe_browsing_util::GeneratePathsToCheck(url, &paths);
     57   EXPECT_EQ(paths.size(), static_cast<size_t>(3));
     58   EXPECT_TRUE(VectorContains(paths, "/saw-cgi/eBayISAPI.dll/"));
     59   EXPECT_TRUE(VectorContains(paths, "/saw-cgi/"));
     60   EXPECT_TRUE(VectorContains(paths, "/"));
     61 }
     62 
     63 // Tests the url canonicalization according to the Safe Browsing spec.
     64 // See section 6.1 in
     65 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
     66 TEST(SafeBrowsingUtilTest, CanonicalizeUrl) {
     67   struct {
     68     const char* input_url;
     69     const char* expected_canonicalized_hostname;
     70     const char* expected_canonicalized_path;
     71     const char* expected_canonicalized_query;
     72   } tests[] = {
     73     {
     74       "http://host/%25%32%35",
     75       "host",
     76       "/%25",
     77       ""
     78     }, {
     79       "http://host/%25%32%35%25%32%35",
     80       "host",
     81       "/%25%25",
     82       ""
     83     }, {
     84       "http://host/%2525252525252525",
     85       "host",
     86       "/%25",
     87       ""
     88     }, {
     89       "http://host/asdf%25%32%35asd",
     90       "host",
     91       "/asdf%25asd",
     92       ""
     93     }, {
     94       "http://host/%%%25%32%35asd%%",
     95       "host",
     96       "/%25%25%25asd%25%25",
     97       ""
     98     }, {
     99       "http://host/%%%25%32%35asd%%",
    100       "host",
    101       "/%25%25%25asd%25%25",
    102       ""
    103     }, {
    104       "http://www.google.com/",
    105       "www.google.com",
    106       "/",
    107       ""
    108     }, {
    109       "http://%31%36%38%2e%31%38%38%2e%39%39%2e%32%36/%2E%73%65%63%75%72%65/%77"
    110           "%77%77%2E%65%62%61%79%2E%63%6F%6D/",
    111       "168.188.99.26",
    112       "/.secure/www.ebay.com/",
    113       ""
    114     }, {
    115       "http://195.127.0.11/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserd"
    116           "ataxplimnbqmn-xplmvalidateinfoswqpcmlx=hgplmcx/",
    117       "195.127.0.11",
    118       "/uploads/%20%20%20%20/.verify/.eBaysecure=updateuserdataxplimnbqmn-xplmv"
    119           "alidateinfoswqpcmlx=hgplmcx/",
    120       ""
    121     }, {
    122       "http://host.com/%257Ea%2521b%2540c%2523d%2524e%25f%255E00%252611%252A"
    123           "22%252833%252944_55%252B",
    124       "host.com",
    125       "/~a!b@c%23d$e%25f^00&11*22(33)44_55+",
    126       ""
    127     }, {
    128       "http://3279880203/blah",
    129       "195.127.0.11",
    130       "/blah",
    131       ""
    132     }, {
    133       "http://www.google.com/blah/..",
    134       "www.google.com",
    135       "/",
    136       ""
    137     }, {
    138       "http://www.google.com/blah#fraq",
    139       "www.google.com",
    140       "/blah",
    141       ""
    142     }, {
    143       "http://www.GOOgle.com/",
    144       "www.google.com",
    145       "/",
    146       ""
    147     }, {
    148       "http://www.google.com.../",
    149       "www.google.com",
    150       "/",
    151       ""
    152     }, {
    153       "http://www.google.com/q?",
    154       "www.google.com",
    155       "/q",
    156       ""
    157     }, {
    158       "http://www.google.com/q?r?",
    159       "www.google.com",
    160       "/q",
    161       "r?"
    162     }, {
    163       "http://www.google.com/q?r?s",
    164       "www.google.com",
    165       "/q",
    166       "r?s"
    167     }, {
    168       "http://evil.com/foo#bar#baz",
    169       "evil.com",
    170       "/foo",
    171       ""
    172     }, {
    173       "http://evil.com/foo;",
    174       "evil.com",
    175       "/foo;",
    176       ""
    177     }, {
    178       "http://evil.com/foo?bar;",
    179       "evil.com",
    180       "/foo",
    181       "bar;"
    182     }, {
    183       "http://notrailingslash.com",
    184       "notrailingslash.com",
    185       "/",
    186       ""
    187     }, {
    188       "http://www.gotaport.com:1234/",
    189       "www.gotaport.com",
    190       "/",
    191       ""
    192     }, {
    193       "  http://www.google.com/  ",
    194       "www.google.com",
    195       "/",
    196       ""
    197     }, {
    198       "http:// leadingspace.com/",
    199       "%20leadingspace.com",
    200       "/",
    201       ""
    202     }, {
    203       "http://%20leadingspace.com/",
    204       "%20leadingspace.com",
    205       "/",
    206       ""
    207     }, {
    208       "https://www.securesite.com/",
    209       "www.securesite.com",
    210       "/",
    211       ""
    212     }, {
    213       "http://host.com/ab%23cd",
    214       "host.com",
    215       "/ab%23cd",
    216       ""
    217     }, {
    218       "http://host%3e.com//twoslashes?more//slashes",
    219       "host>.com",
    220       "/twoslashes",
    221       "more//slashes"
    222     }, {
    223       "http://host.com/abc?val=xyz#anything",
    224       "host.com",
    225       "/abc",
    226       "val=xyz"
    227     }, {
    228       "http://abc:def@host.com/xyz",
    229       "host.com",
    230       "/xyz",
    231       ""
    232     }, {
    233       "http://host%3e.com/abc/%2e%2e%2fdef",
    234       "host>.com",
    235       "/def",
    236       ""
    237     }, {
    238       "http://.......host...com.....//abc/////def%2F%2F%2Fxyz",
    239       "host.com",
    240       "/abc/def/xyz",
    241       ""
    242     }, {
    243       "ftp://host.com/foo?bar",
    244       "host.com",
    245       "/foo",
    246       "bar"
    247     }, {
    248       "data:text/html;charset=utf-8,%0D%0A",
    249       "",
    250       "",
    251       ""
    252     }, {
    253       "javascript:alert()",
    254       "",
    255       "",
    256       ""
    257     }, {
    258       "mailto:abc (at) example.com",
    259       "",
    260       "",
    261       ""
    262     },
    263   };
    264   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
    265     SCOPED_TRACE(StringPrintf("Test: %s", tests[i].input_url));
    266     GURL url(tests[i].input_url);
    267 
    268     std::string canonicalized_hostname;
    269     std::string canonicalized_path;
    270     std::string canonicalized_query;
    271     safe_browsing_util::CanonicalizeUrl(url, &canonicalized_hostname,
    272         &canonicalized_path, &canonicalized_query);
    273 
    274     EXPECT_EQ(tests[i].expected_canonicalized_hostname,
    275               canonicalized_hostname);
    276     EXPECT_EQ(tests[i].expected_canonicalized_path,
    277               canonicalized_path);
    278     EXPECT_EQ(tests[i].expected_canonicalized_query,
    279               canonicalized_query);
    280   }
    281 }
    282 
    283 TEST(SafeBrowsingUtilTest, GetUrlHashIndex) {
    284   GURL url("http://www.evil.com/phish.html");
    285   SBFullHashResult full_hash;
    286   crypto::SHA256HashString(url.host() + url.path(),
    287                          &full_hash.hash,
    288                          sizeof(SBFullHash));
    289   std::vector<SBFullHashResult> full_hashes;
    290   full_hashes.push_back(full_hash);
    291 
    292   EXPECT_EQ(safe_browsing_util::GetUrlHashIndex(url, full_hashes), 0);
    293 
    294   url = GURL("http://www.evil.com/okay_path.html");
    295   EXPECT_EQ(safe_browsing_util::GetUrlHashIndex(url, full_hashes), -1);
    296 }
    297 
    298 TEST(SafeBrowsingUtilTest, ListIdListNameConversion) {
    299   std::string list_name;
    300   EXPECT_FALSE(safe_browsing_util::GetListName(safe_browsing_util::INVALID,
    301                                                &list_name));
    302   EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::MALWARE,
    303                                               &list_name));
    304   EXPECT_EQ(list_name, std::string(safe_browsing_util::kMalwareList));
    305   EXPECT_EQ(safe_browsing_util::MALWARE,
    306             safe_browsing_util::GetListId(list_name));
    307 
    308   EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::PHISH,
    309                                               &list_name));
    310   EXPECT_EQ(list_name, std::string(safe_browsing_util::kPhishingList));
    311   EXPECT_EQ(safe_browsing_util::PHISH,
    312             safe_browsing_util::GetListId(list_name));
    313 
    314   EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINURL,
    315                                               &list_name));
    316   EXPECT_EQ(list_name, std::string(safe_browsing_util::kBinUrlList));
    317   EXPECT_EQ(safe_browsing_util::BINURL,
    318             safe_browsing_util::GetListId(list_name));
    319 
    320 
    321   EXPECT_TRUE(safe_browsing_util::GetListName(safe_browsing_util::BINHASH,
    322                                               &list_name));
    323   EXPECT_EQ(list_name, std::string(safe_browsing_util::kBinHashList));
    324   EXPECT_EQ(safe_browsing_util::BINHASH,
    325             safe_browsing_util::GetListId(list_name));
    326 }
    327 
    328 // Since the ids are saved in file, we need to make sure they don't change.
    329 // Since only the last bit of each id is saved in file together with
    330 // chunkids, this checks only last bit.
    331 TEST(SafeBrowsingUtilTest, ListIdVerification) {
    332   EXPECT_EQ(0, safe_browsing_util::MALWARE % 2);
    333   EXPECT_EQ(1, safe_browsing_util::PHISH % 2);
    334   EXPECT_EQ(0, safe_browsing_util::BINURL %2);
    335   EXPECT_EQ(1, safe_browsing_util::BINHASH % 2);
    336 }
    337 
    338 TEST(SafeBrowsingUtilTest, StringToSBFullHashAndSBFullHashToString) {
    339   // 31 chars plus the last \0 as full_hash.
    340   const std::string hash_in = "12345678902234567890323456789012";
    341   SBFullHash hash_out;
    342   safe_browsing_util::StringToSBFullHash(hash_in, &hash_out);
    343   EXPECT_EQ(0x34333231, hash_out.prefix);
    344   EXPECT_EQ(0, memcmp(hash_in.data(), hash_out.full_hash, sizeof(SBFullHash)));
    345 
    346   std::string hash_final = safe_browsing_util::SBFullHashToString(hash_out);
    347   EXPECT_EQ(hash_in, hash_final);
    348 }
    349