Home | History | Annotate | Download | only in src
      1 // Copyright 2007 Google Inc. All Rights Reserved.
      2 // Author: brettw (at) google.com (Brett Wilson)
      3 
      4 #include "googleurl/src/gurl.h"
      5 #include "googleurl/src/url_canon.h"
      6 #include "googleurl/src/url_test_utils.h"
      7 #include "testing/gtest/include/gtest/gtest.h"
      8 
      9 // Some implementations of base/basictypes.h may define ARRAYSIZE.
     10 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro
     11 // which is in our version of basictypes.h.
     12 #ifndef ARRAYSIZE
     13 #define ARRAYSIZE ARRAYSIZE_UNSAFE
     14 #endif
     15 
     16 using url_test_utils::WStringToUTF16;
     17 using url_test_utils::ConvertUTF8ToUTF16;
     18 
     19 namespace {
     20 
     21 template<typename CHAR>
     22 void SetupReplacement(void (url_canon::Replacements<CHAR>::*func)(const CHAR*,
     23                           const url_parse::Component&),
     24                       url_canon::Replacements<CHAR>* replacements,
     25                       const CHAR* str) {
     26   if (str) {
     27     url_parse::Component comp;
     28     if (str[0])
     29       comp.len = static_cast<int>(strlen(str));
     30     (replacements->*func)(str, comp);
     31   }
     32 }
     33 
     34 // Returns the canonicalized string for the given URL string for the
     35 // GURLTest.Types test.
     36 std::string TypesTestCase(const char* src) {
     37   GURL gurl(src);
     38   return gurl.possibly_invalid_spec();
     39 }
     40 
     41 }  // namespace
     42 
     43 // Different types of URLs should be handled differently by url_util, and
     44 // handed off to different canonicalizers.
     45 TEST(GURLTest, Types) {
     46   // URLs with unknown schemes should be treated as path URLs, even when they
     47   // have things like "://".
     48   EXPECT_EQ("something:///HOSTNAME.com/",
     49             TypesTestCase("something:///HOSTNAME.com/"));
     50 
     51   // In the reverse, known schemes should always trigger standard URL handling.
     52   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com"));
     53   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com"));
     54   EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com"));
     55   EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com"));
     56 
     57 #ifdef WIN32
     58   // URLs that look like absolute Windows drive specs.
     59   EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt"));
     60   EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt"));
     61   EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt"));
     62   EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt"));
     63 #endif
     64 }
     65 
     66 // Test the basic creation and querying of components in a GURL. We assume
     67 // the parser is already tested and works, so we are mostly interested if the
     68 // object does the right thing with the results.
     69 TEST(GURLTest, Components) {
     70   GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
     71   EXPECT_TRUE(url.is_valid());
     72   EXPECT_TRUE(url.SchemeIs("http"));
     73   EXPECT_FALSE(url.SchemeIsFile());
     74 
     75   // This is the narrow version of the URL, which should match the wide input.
     76   EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url.spec());
     77 
     78   EXPECT_EQ("http", url.scheme());
     79   EXPECT_EQ("user", url.username());
     80   EXPECT_EQ("pass", url.password());
     81   EXPECT_EQ("google.com", url.host());
     82   EXPECT_EQ("99", url.port());
     83   EXPECT_EQ(99, url.IntPort());
     84   EXPECT_EQ("/foo;bar", url.path());
     85   EXPECT_EQ("q=a", url.query());
     86   EXPECT_EQ("ref", url.ref());
     87 }
     88 
     89 TEST(GURLTest, Empty) {
     90   GURL url;
     91   EXPECT_FALSE(url.is_valid());
     92   EXPECT_EQ("", url.spec());
     93 
     94   EXPECT_EQ("", url.scheme());
     95   EXPECT_EQ("", url.username());
     96   EXPECT_EQ("", url.password());
     97   EXPECT_EQ("", url.host());
     98   EXPECT_EQ("", url.port());
     99   EXPECT_EQ(url_parse::PORT_UNSPECIFIED, url.IntPort());
    100   EXPECT_EQ("", url.path());
    101   EXPECT_EQ("", url.query());
    102   EXPECT_EQ("", url.ref());
    103 }
    104 
    105 TEST(GURLTest, Copy) {
    106   GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref"));
    107 
    108   GURL url2(url);
    109   EXPECT_TRUE(url2.is_valid());
    110 
    111   EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec());
    112   EXPECT_EQ("http", url2.scheme());
    113   EXPECT_EQ("user", url2.username());
    114   EXPECT_EQ("pass", url2.password());
    115   EXPECT_EQ("google.com", url2.host());
    116   EXPECT_EQ("99", url2.port());
    117   EXPECT_EQ(99, url2.IntPort());
    118   EXPECT_EQ("/foo;bar", url2.path());
    119   EXPECT_EQ("q=a", url2.query());
    120   EXPECT_EQ("ref", url2.ref());
    121 
    122   // Copying of invalid URL should be invalid
    123   GURL invalid;
    124   GURL invalid2(invalid);
    125   EXPECT_FALSE(invalid2.is_valid());
    126   EXPECT_EQ("", invalid2.spec());
    127   EXPECT_EQ("", invalid2.scheme());
    128   EXPECT_EQ("", invalid2.username());
    129   EXPECT_EQ("", invalid2.password());
    130   EXPECT_EQ("", invalid2.host());
    131   EXPECT_EQ("", invalid2.port());
    132   EXPECT_EQ(url_parse::PORT_UNSPECIFIED, invalid2.IntPort());
    133   EXPECT_EQ("", invalid2.path());
    134   EXPECT_EQ("", invalid2.query());
    135   EXPECT_EQ("", invalid2.ref());
    136 }
    137 
    138 // Given an invalid URL, we should still get most of the components.
    139 TEST(GURLTest, Invalid) {
    140   GURL url("http:google.com:foo");
    141   EXPECT_FALSE(url.is_valid());
    142   EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec());
    143 
    144   EXPECT_EQ("http", url.scheme());
    145   EXPECT_EQ("", url.username());
    146   EXPECT_EQ("", url.password());
    147   EXPECT_EQ("google.com", url.host());
    148   EXPECT_EQ("foo", url.port());
    149   EXPECT_EQ(url_parse::PORT_INVALID, url.IntPort());
    150   EXPECT_EQ("/", url.path());
    151   EXPECT_EQ("", url.query());
    152   EXPECT_EQ("", url.ref());
    153 }
    154 
    155 TEST(GURLTest, Resolve) {
    156   // The tricky cases for relative URL resolving are tested in the
    157   // canonicalizer unit test. Here, we just test that the GURL integration
    158   // works properly.
    159   struct ResolveCase {
    160     const char* base;
    161     const char* relative;
    162     bool expected_valid;
    163     const char* expected;
    164   } resolve_cases[] = {
    165     {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"},
    166     {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"},
    167     {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"},
    168     {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"},
    169     {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"},
    170       // Unknown schemes are not standard.
    171     {"data:blahblah", "http://google.com/", true, "http://google.com/"},
    172     {"data:blahblah", "http:google.com", true, "http://google.com/"},
    173     {"data:/blahblah", "file.html", false, ""},
    174   };
    175 
    176   for (size_t i = 0; i < ARRAYSIZE(resolve_cases); i++) {
    177     // 8-bit code path.
    178     GURL input(resolve_cases[i].base);
    179     GURL output = input.Resolve(resolve_cases[i].relative);
    180     EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i;
    181     EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i;
    182 
    183     // Wide code path.
    184     GURL inputw(ConvertUTF8ToUTF16(resolve_cases[i].base));
    185     GURL outputw =
    186         input.Resolve(ConvertUTF8ToUTF16(resolve_cases[i].relative));
    187     EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i;
    188     EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i;
    189   }
    190 }
    191 
    192 TEST(GURLTest, GetOrigin) {
    193   struct TestCase {
    194     const char* input;
    195     const char* expected;
    196   } cases[] = {
    197     {"http://www.google.com", "http://www.google.com/"},
    198     {"javascript:window.alert(\"hello,world\");", ""},
    199     {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/"},
    200     {"http://user@www.google.com", "http://www.google.com/"},
    201     {"http://:pass@www.google.com", "http://www.google.com/"},
    202     {"http://:@www.google.com", "http://www.google.com/"},
    203   };
    204   for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
    205     GURL url(cases[i].input);
    206     GURL origin = url.GetOrigin();
    207     EXPECT_EQ(cases[i].expected, origin.spec());
    208   }
    209 }
    210 
    211 TEST(GURLTest, GetWithEmptyPath) {
    212   struct TestCase {
    213     const char* input;
    214     const char* expected;
    215   } cases[] = {
    216     {"http://www.google.com", "http://www.google.com/"},
    217     {"javascript:window.alert(\"hello, world\");", ""},
    218     {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"},
    219   };
    220 
    221   for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
    222     GURL url(cases[i].input);
    223     GURL empty_path = url.GetWithEmptyPath();
    224     EXPECT_EQ(cases[i].expected, empty_path.spec());
    225   }
    226 }
    227 
    228 TEST(GURLTest, Replacements) {
    229   // The url canonicalizer replacement test will handle most of these case.
    230   // The most important thing to do here is to check that the proper
    231   // canonicalizer gets called based on the scheme of the input.
    232   struct ReplaceCase {
    233     const char* base;
    234     const char* scheme;
    235     const char* username;
    236     const char* password;
    237     const char* host;
    238     const char* port;
    239     const char* path;
    240     const char* query;
    241     const char* ref;
    242     const char* expected;
    243   } replace_cases[] = {
    244     {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "http://www.google.com/"},
    245     {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"},
    246     {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo","search", "ref", "http://www.google.com:99/foo?search#ref"},
    247 #ifdef WIN32
    248     {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"},
    249 #endif
    250   };
    251 
    252   for (size_t i = 0; i < ARRAYSIZE(replace_cases); i++) {
    253     const ReplaceCase& cur = replace_cases[i];
    254     GURL url(cur.base);
    255     GURL::Replacements repl;
    256     SetupReplacement(&GURL::Replacements::SetScheme, &repl, cur.scheme);
    257     SetupReplacement(&GURL::Replacements::SetUsername, &repl, cur.username);
    258     SetupReplacement(&GURL::Replacements::SetPassword, &repl, cur.password);
    259     SetupReplacement(&GURL::Replacements::SetHost, &repl, cur.host);
    260     SetupReplacement(&GURL::Replacements::SetPort, &repl, cur.port);
    261     SetupReplacement(&GURL::Replacements::SetPath, &repl, cur.path);
    262     SetupReplacement(&GURL::Replacements::SetQuery, &repl, cur.query);
    263     SetupReplacement(&GURL::Replacements::SetRef, &repl, cur.ref);
    264     GURL output = url.ReplaceComponents(repl);
    265 
    266     EXPECT_EQ(replace_cases[i].expected, output.spec());
    267   }
    268 }
    269 
    270 TEST(GURLTest, PathForRequest) {
    271   struct TestCase {
    272     const char* input;
    273     const char* expected;
    274   } cases[] = {
    275     {"http://www.google.com", "/"},
    276     {"http://www.google.com/", "/"},
    277     {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22"},
    278     {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html"},
    279     {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query"},
    280   };
    281 
    282   for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
    283     GURL url(cases[i].input);
    284     std::string path_request = url.PathForRequest();
    285     EXPECT_EQ(cases[i].expected, path_request);
    286   }
    287 }
    288 
    289 TEST(GURLTest, EffectiveIntPort) {
    290   struct PortTest {
    291     const char* spec;
    292     int expected_int_port;
    293   } port_tests[] = {
    294     // http
    295     {"http://www.google.com/", 80},
    296     {"http://www.google.com:80/", 80},
    297     {"http://www.google.com:443/", 443},
    298 
    299     // https
    300     {"https://www.google.com/", 443},
    301     {"https://www.google.com:443/", 443},
    302     {"https://www.google.com:80/", 80},
    303 
    304     // ftp
    305     {"ftp://www.google.com/", 21},
    306     {"ftp://www.google.com:21/", 21},
    307     {"ftp://www.google.com:80/", 80},
    308 
    309     // gopher
    310     {"gopher://www.google.com/", 70},
    311     {"gopher://www.google.com:70/", 70},
    312     {"gopher://www.google.com:80/", 80},
    313 
    314     // file - no port
    315     {"file://www.google.com/", url_parse::PORT_UNSPECIFIED},
    316     {"file://www.google.com:443/", url_parse::PORT_UNSPECIFIED},
    317 
    318     // data - no port
    319     {"data:www.google.com:90", url_parse::PORT_UNSPECIFIED},
    320     {"data:www.google.com", url_parse::PORT_UNSPECIFIED},
    321   };
    322 
    323   for (size_t i = 0; i < ARRAYSIZE(port_tests); i++) {
    324     GURL url(port_tests[i].spec);
    325     EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort());
    326   }
    327 }
    328 
    329 TEST(GURLTest, IPAddress) {
    330   struct IPTest {
    331     const char* spec;
    332     bool expected_ip;
    333   } ip_tests[] = {
    334     {"http://www.google.com/", false},
    335     {"http://192.168.9.1/", true},
    336     {"http://192.168.9.1.2/", false},
    337     {"http://192.168.m.1/", false},
    338     {"http://2001:db8::1/", false},
    339     {"http://[2001:db8::1]/", true},
    340     {"", false},
    341     {"some random input!", false},
    342   };
    343 
    344   for (size_t i = 0; i < ARRAYSIZE(ip_tests); i++) {
    345     GURL url(ip_tests[i].spec);
    346     EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress());
    347   }
    348 }
    349 
    350 TEST(GURLTest, HostNoBrackets) {
    351   struct TestCase {
    352     const char* input;
    353     const char* expected_host;
    354     const char* expected_plainhost;
    355   } cases[] = {
    356     {"http://www.google.com", "www.google.com", "www.google.com"},
    357     {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"},
    358     {"http://[::]/", "[::]", "::"},
    359 
    360     // Don't require a valid URL, but don't crash either.
    361     {"http://[]/", "[]", ""},
    362     {"http://[x]/", "[x]", "x"},
    363     {"http://[x/", "[x", "[x"},
    364     {"http://x]/", "x]", "x]"},
    365     {"http://[/", "[", "["},
    366     {"http://]/", "]", "]"},
    367     {"", "", ""},
    368   };
    369   for (size_t i = 0; i < ARRAYSIZE(cases); i++) {
    370     GURL url(cases[i].input);
    371     EXPECT_EQ(cases[i].expected_host, url.host());
    372     EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets());
    373   }
    374 }
    375 
    376 TEST(GURLTest, DomainIs) {
    377   const char google_domain[] = "google.com";
    378 
    379   GURL url_1("http://www.google.com:99/foo");
    380   EXPECT_TRUE(url_1.DomainIs(google_domain));
    381 
    382   GURL url_2("http://google.com:99/foo");
    383   EXPECT_TRUE(url_2.DomainIs(google_domain));
    384 
    385   GURL url_3("http://google.com./foo");
    386   EXPECT_TRUE(url_3.DomainIs(google_domain));
    387 
    388   GURL url_4("http://google.com/foo");
    389   EXPECT_FALSE(url_4.DomainIs("google.com."));
    390 
    391   GURL url_5("http://google.com./foo");
    392   EXPECT_TRUE(url_5.DomainIs("google.com."));
    393 
    394   GURL url_6("http://www.google.com./foo");
    395   EXPECT_TRUE(url_6.DomainIs(".com."));
    396 
    397   GURL url_7("http://www.balabala.com/foo");
    398   EXPECT_FALSE(url_7.DomainIs(google_domain));
    399 
    400   GURL url_8("http://www.google.com.cn/foo");
    401   EXPECT_FALSE(url_8.DomainIs(google_domain));
    402 
    403   GURL url_9("http://www.iamnotgoogle.com/foo");
    404   EXPECT_FALSE(url_9.DomainIs(google_domain));
    405 
    406   GURL url_10("http://www.iamnotgoogle.com../foo");
    407   EXPECT_FALSE(url_10.DomainIs(".com"));
    408 }
    409 
    410 // Newlines should be stripped from inputs.
    411 TEST(GURLTest, Newlines) {
    412   // Constructor.
    413   GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n ");
    414   EXPECT_EQ("http://www.google.com/asdf", url_1.spec());
    415 
    416   // Relative path resolver.
    417   GURL url_2 = url_1.Resolve(" \n /fo\to\r ");
    418   EXPECT_EQ("http://www.google.com/foo", url_2.spec());
    419 
    420   // Note that newlines are NOT stripped from ReplaceComponents.
    421 }
    422 
    423 TEST(GURLTest, IsStandard) {
    424   GURL a("http:foo/bar");
    425   EXPECT_TRUE(a.IsStandard());
    426 
    427   GURL b("foo:bar/baz");
    428   EXPECT_FALSE(b.IsStandard());
    429 
    430   GURL c("foo://bar/baz");
    431   EXPECT_FALSE(c.IsStandard());
    432 }
    433