1 // Copyright 2007 Google Inc. All Rights Reserved. 2 // Author: brettw (at) google.com (Brett Wilson) 3 4 #include "googleurl/src/gurl.h" 5 #include "googleurl/src/url_canon.h" 6 #include "googleurl/src/url_test_utils.h" 7 #include "testing/gtest/include/gtest/gtest.h" 8 9 // Some implementations of base/basictypes.h may define ARRAYSIZE. 10 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro 11 // which is in our version of basictypes.h. 12 #ifndef ARRAYSIZE 13 #define ARRAYSIZE ARRAYSIZE_UNSAFE 14 #endif 15 16 using url_test_utils::WStringToUTF16; 17 using url_test_utils::ConvertUTF8ToUTF16; 18 19 namespace { 20 21 template<typename CHAR> 22 void SetupReplacement(void (url_canon::Replacements<CHAR>::*func)(const CHAR*, 23 const url_parse::Component&), 24 url_canon::Replacements<CHAR>* replacements, 25 const CHAR* str) { 26 if (str) { 27 url_parse::Component comp; 28 if (str[0]) 29 comp.len = static_cast<int>(strlen(str)); 30 (replacements->*func)(str, comp); 31 } 32 } 33 34 // Returns the canonicalized string for the given URL string for the 35 // GURLTest.Types test. 36 std::string TypesTestCase(const char* src) { 37 GURL gurl(src); 38 return gurl.possibly_invalid_spec(); 39 } 40 41 } // namespace 42 43 // Different types of URLs should be handled differently by url_util, and 44 // handed off to different canonicalizers. 45 TEST(GURLTest, Types) { 46 // URLs with unknown schemes should be treated as path URLs, even when they 47 // have things like "://". 48 EXPECT_EQ("something:///HOSTNAME.com/", 49 TypesTestCase("something:///HOSTNAME.com/")); 50 51 // In the reverse, known schemes should always trigger standard URL handling. 52 EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com")); 53 EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com")); 54 EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com")); 55 EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com")); 56 57 #ifdef WIN32 58 // URLs that look like absolute Windows drive specs. 59 EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt")); 60 EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt")); 61 EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt")); 62 EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt")); 63 #endif 64 } 65 66 // Test the basic creation and querying of components in a GURL. We assume 67 // the parser is already tested and works, so we are mostly interested if the 68 // object does the right thing with the results. 69 TEST(GURLTest, Components) { 70 GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref")); 71 EXPECT_TRUE(url.is_valid()); 72 EXPECT_TRUE(url.SchemeIs("http")); 73 EXPECT_FALSE(url.SchemeIsFile()); 74 75 // This is the narrow version of the URL, which should match the wide input. 76 EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url.spec()); 77 78 EXPECT_EQ("http", url.scheme()); 79 EXPECT_EQ("user", url.username()); 80 EXPECT_EQ("pass", url.password()); 81 EXPECT_EQ("google.com", url.host()); 82 EXPECT_EQ("99", url.port()); 83 EXPECT_EQ(99, url.IntPort()); 84 EXPECT_EQ("/foo;bar", url.path()); 85 EXPECT_EQ("q=a", url.query()); 86 EXPECT_EQ("ref", url.ref()); 87 } 88 89 TEST(GURLTest, Empty) { 90 GURL url; 91 EXPECT_FALSE(url.is_valid()); 92 EXPECT_EQ("", url.spec()); 93 94 EXPECT_EQ("", url.scheme()); 95 EXPECT_EQ("", url.username()); 96 EXPECT_EQ("", url.password()); 97 EXPECT_EQ("", url.host()); 98 EXPECT_EQ("", url.port()); 99 EXPECT_EQ(url_parse::PORT_UNSPECIFIED, url.IntPort()); 100 EXPECT_EQ("", url.path()); 101 EXPECT_EQ("", url.query()); 102 EXPECT_EQ("", url.ref()); 103 } 104 105 TEST(GURLTest, Copy) { 106 GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref")); 107 108 GURL url2(url); 109 EXPECT_TRUE(url2.is_valid()); 110 111 EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec()); 112 EXPECT_EQ("http", url2.scheme()); 113 EXPECT_EQ("user", url2.username()); 114 EXPECT_EQ("pass", url2.password()); 115 EXPECT_EQ("google.com", url2.host()); 116 EXPECT_EQ("99", url2.port()); 117 EXPECT_EQ(99, url2.IntPort()); 118 EXPECT_EQ("/foo;bar", url2.path()); 119 EXPECT_EQ("q=a", url2.query()); 120 EXPECT_EQ("ref", url2.ref()); 121 122 // Copying of invalid URL should be invalid 123 GURL invalid; 124 GURL invalid2(invalid); 125 EXPECT_FALSE(invalid2.is_valid()); 126 EXPECT_EQ("", invalid2.spec()); 127 EXPECT_EQ("", invalid2.scheme()); 128 EXPECT_EQ("", invalid2.username()); 129 EXPECT_EQ("", invalid2.password()); 130 EXPECT_EQ("", invalid2.host()); 131 EXPECT_EQ("", invalid2.port()); 132 EXPECT_EQ(url_parse::PORT_UNSPECIFIED, invalid2.IntPort()); 133 EXPECT_EQ("", invalid2.path()); 134 EXPECT_EQ("", invalid2.query()); 135 EXPECT_EQ("", invalid2.ref()); 136 } 137 138 // Given an invalid URL, we should still get most of the components. 139 TEST(GURLTest, Invalid) { 140 GURL url("http:google.com:foo"); 141 EXPECT_FALSE(url.is_valid()); 142 EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec()); 143 144 EXPECT_EQ("http", url.scheme()); 145 EXPECT_EQ("", url.username()); 146 EXPECT_EQ("", url.password()); 147 EXPECT_EQ("google.com", url.host()); 148 EXPECT_EQ("foo", url.port()); 149 EXPECT_EQ(url_parse::PORT_INVALID, url.IntPort()); 150 EXPECT_EQ("/", url.path()); 151 EXPECT_EQ("", url.query()); 152 EXPECT_EQ("", url.ref()); 153 } 154 155 TEST(GURLTest, Resolve) { 156 // The tricky cases for relative URL resolving are tested in the 157 // canonicalizer unit test. Here, we just test that the GURL integration 158 // works properly. 159 struct ResolveCase { 160 const char* base; 161 const char* relative; 162 bool expected_valid; 163 const char* expected; 164 } resolve_cases[] = { 165 {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"}, 166 {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"}, 167 {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"}, 168 {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"}, 169 {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"}, 170 // Unknown schemes are not standard. 171 {"data:blahblah", "http://google.com/", true, "http://google.com/"}, 172 {"data:blahblah", "http:google.com", true, "http://google.com/"}, 173 {"data:/blahblah", "file.html", false, ""}, 174 }; 175 176 for (size_t i = 0; i < ARRAYSIZE(resolve_cases); i++) { 177 // 8-bit code path. 178 GURL input(resolve_cases[i].base); 179 GURL output = input.Resolve(resolve_cases[i].relative); 180 EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i; 181 EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i; 182 183 // Wide code path. 184 GURL inputw(ConvertUTF8ToUTF16(resolve_cases[i].base)); 185 GURL outputw = 186 input.Resolve(ConvertUTF8ToUTF16(resolve_cases[i].relative)); 187 EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i; 188 EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i; 189 } 190 } 191 192 TEST(GURLTest, GetOrigin) { 193 struct TestCase { 194 const char* input; 195 const char* expected; 196 } cases[] = { 197 {"http://www.google.com", "http://www.google.com/"}, 198 {"javascript:window.alert(\"hello,world\");", ""}, 199 {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/"}, 200 {"http://user@www.google.com", "http://www.google.com/"}, 201 {"http://:pass@www.google.com", "http://www.google.com/"}, 202 {"http://:@www.google.com", "http://www.google.com/"}, 203 }; 204 for (size_t i = 0; i < ARRAYSIZE(cases); i++) { 205 GURL url(cases[i].input); 206 GURL origin = url.GetOrigin(); 207 EXPECT_EQ(cases[i].expected, origin.spec()); 208 } 209 } 210 211 TEST(GURLTest, GetWithEmptyPath) { 212 struct TestCase { 213 const char* input; 214 const char* expected; 215 } cases[] = { 216 {"http://www.google.com", "http://www.google.com/"}, 217 {"javascript:window.alert(\"hello, world\");", ""}, 218 {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"}, 219 }; 220 221 for (size_t i = 0; i < ARRAYSIZE(cases); i++) { 222 GURL url(cases[i].input); 223 GURL empty_path = url.GetWithEmptyPath(); 224 EXPECT_EQ(cases[i].expected, empty_path.spec()); 225 } 226 } 227 228 TEST(GURLTest, Replacements) { 229 // The url canonicalizer replacement test will handle most of these case. 230 // The most important thing to do here is to check that the proper 231 // canonicalizer gets called based on the scheme of the input. 232 struct ReplaceCase { 233 const char* base; 234 const char* scheme; 235 const char* username; 236 const char* password; 237 const char* host; 238 const char* port; 239 const char* path; 240 const char* query; 241 const char* ref; 242 const char* expected; 243 } replace_cases[] = { 244 {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "http://www.google.com/"}, 245 {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"}, 246 {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo","search", "ref", "http://www.google.com:99/foo?search#ref"}, 247 #ifdef WIN32 248 {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"}, 249 #endif 250 }; 251 252 for (size_t i = 0; i < ARRAYSIZE(replace_cases); i++) { 253 const ReplaceCase& cur = replace_cases[i]; 254 GURL url(cur.base); 255 GURL::Replacements repl; 256 SetupReplacement(&GURL::Replacements::SetScheme, &repl, cur.scheme); 257 SetupReplacement(&GURL::Replacements::SetUsername, &repl, cur.username); 258 SetupReplacement(&GURL::Replacements::SetPassword, &repl, cur.password); 259 SetupReplacement(&GURL::Replacements::SetHost, &repl, cur.host); 260 SetupReplacement(&GURL::Replacements::SetPort, &repl, cur.port); 261 SetupReplacement(&GURL::Replacements::SetPath, &repl, cur.path); 262 SetupReplacement(&GURL::Replacements::SetQuery, &repl, cur.query); 263 SetupReplacement(&GURL::Replacements::SetRef, &repl, cur.ref); 264 GURL output = url.ReplaceComponents(repl); 265 266 EXPECT_EQ(replace_cases[i].expected, output.spec()); 267 } 268 } 269 270 TEST(GURLTest, PathForRequest) { 271 struct TestCase { 272 const char* input; 273 const char* expected; 274 } cases[] = { 275 {"http://www.google.com", "/"}, 276 {"http://www.google.com/", "/"}, 277 {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22"}, 278 {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html"}, 279 {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query"}, 280 }; 281 282 for (size_t i = 0; i < ARRAYSIZE(cases); i++) { 283 GURL url(cases[i].input); 284 std::string path_request = url.PathForRequest(); 285 EXPECT_EQ(cases[i].expected, path_request); 286 } 287 } 288 289 TEST(GURLTest, EffectiveIntPort) { 290 struct PortTest { 291 const char* spec; 292 int expected_int_port; 293 } port_tests[] = { 294 // http 295 {"http://www.google.com/", 80}, 296 {"http://www.google.com:80/", 80}, 297 {"http://www.google.com:443/", 443}, 298 299 // https 300 {"https://www.google.com/", 443}, 301 {"https://www.google.com:443/", 443}, 302 {"https://www.google.com:80/", 80}, 303 304 // ftp 305 {"ftp://www.google.com/", 21}, 306 {"ftp://www.google.com:21/", 21}, 307 {"ftp://www.google.com:80/", 80}, 308 309 // gopher 310 {"gopher://www.google.com/", 70}, 311 {"gopher://www.google.com:70/", 70}, 312 {"gopher://www.google.com:80/", 80}, 313 314 // file - no port 315 {"file://www.google.com/", url_parse::PORT_UNSPECIFIED}, 316 {"file://www.google.com:443/", url_parse::PORT_UNSPECIFIED}, 317 318 // data - no port 319 {"data:www.google.com:90", url_parse::PORT_UNSPECIFIED}, 320 {"data:www.google.com", url_parse::PORT_UNSPECIFIED}, 321 }; 322 323 for (size_t i = 0; i < ARRAYSIZE(port_tests); i++) { 324 GURL url(port_tests[i].spec); 325 EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort()); 326 } 327 } 328 329 TEST(GURLTest, IPAddress) { 330 struct IPTest { 331 const char* spec; 332 bool expected_ip; 333 } ip_tests[] = { 334 {"http://www.google.com/", false}, 335 {"http://192.168.9.1/", true}, 336 {"http://192.168.9.1.2/", false}, 337 {"http://192.168.m.1/", false}, 338 {"http://2001:db8::1/", false}, 339 {"http://[2001:db8::1]/", true}, 340 {"", false}, 341 {"some random input!", false}, 342 }; 343 344 for (size_t i = 0; i < ARRAYSIZE(ip_tests); i++) { 345 GURL url(ip_tests[i].spec); 346 EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress()); 347 } 348 } 349 350 TEST(GURLTest, HostNoBrackets) { 351 struct TestCase { 352 const char* input; 353 const char* expected_host; 354 const char* expected_plainhost; 355 } cases[] = { 356 {"http://www.google.com", "www.google.com", "www.google.com"}, 357 {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"}, 358 {"http://[::]/", "[::]", "::"}, 359 360 // Don't require a valid URL, but don't crash either. 361 {"http://[]/", "[]", ""}, 362 {"http://[x]/", "[x]", "x"}, 363 {"http://[x/", "[x", "[x"}, 364 {"http://x]/", "x]", "x]"}, 365 {"http://[/", "[", "["}, 366 {"http://]/", "]", "]"}, 367 {"", "", ""}, 368 }; 369 for (size_t i = 0; i < ARRAYSIZE(cases); i++) { 370 GURL url(cases[i].input); 371 EXPECT_EQ(cases[i].expected_host, url.host()); 372 EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets()); 373 } 374 } 375 376 TEST(GURLTest, DomainIs) { 377 const char google_domain[] = "google.com"; 378 379 GURL url_1("http://www.google.com:99/foo"); 380 EXPECT_TRUE(url_1.DomainIs(google_domain)); 381 382 GURL url_2("http://google.com:99/foo"); 383 EXPECT_TRUE(url_2.DomainIs(google_domain)); 384 385 GURL url_3("http://google.com./foo"); 386 EXPECT_TRUE(url_3.DomainIs(google_domain)); 387 388 GURL url_4("http://google.com/foo"); 389 EXPECT_FALSE(url_4.DomainIs("google.com.")); 390 391 GURL url_5("http://google.com./foo"); 392 EXPECT_TRUE(url_5.DomainIs("google.com.")); 393 394 GURL url_6("http://www.google.com./foo"); 395 EXPECT_TRUE(url_6.DomainIs(".com.")); 396 397 GURL url_7("http://www.balabala.com/foo"); 398 EXPECT_FALSE(url_7.DomainIs(google_domain)); 399 400 GURL url_8("http://www.google.com.cn/foo"); 401 EXPECT_FALSE(url_8.DomainIs(google_domain)); 402 403 GURL url_9("http://www.iamnotgoogle.com/foo"); 404 EXPECT_FALSE(url_9.DomainIs(google_domain)); 405 406 GURL url_10("http://www.iamnotgoogle.com../foo"); 407 EXPECT_FALSE(url_10.DomainIs(".com")); 408 } 409 410 // Newlines should be stripped from inputs. 411 TEST(GURLTest, Newlines) { 412 // Constructor. 413 GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n "); 414 EXPECT_EQ("http://www.google.com/asdf", url_1.spec()); 415 416 // Relative path resolver. 417 GURL url_2 = url_1.Resolve(" \n /fo\to\r "); 418 EXPECT_EQ("http://www.google.com/foo", url_2.spec()); 419 420 // Note that newlines are NOT stripped from ReplaceComponents. 421 } 422 423 TEST(GURLTest, IsStandard) { 424 GURL a("http:foo/bar"); 425 EXPECT_TRUE(a.IsStandard()); 426 427 GURL b("foo:bar/baz"); 428 EXPECT_FALSE(b.IsStandard()); 429 430 GURL c("foo://bar/baz"); 431 EXPECT_FALSE(c.IsStandard()); 432 } 433