1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "testing/gtest/include/gtest/gtest.h" 6 #include "url/gurl.h" 7 #include "url/url_canon.h" 8 #include "url/url_test_utils.h" 9 10 // Some implementations of base/basictypes.h may define ARRAYSIZE. 11 // If it's not defined, we define it to the ARRAYSIZE_UNSAFE macro 12 // which is in our version of basictypes.h. 13 #ifndef ARRAYSIZE 14 #define ARRAYSIZE ARRAYSIZE_UNSAFE 15 #endif 16 17 using url_test_utils::WStringToUTF16; 18 using url_test_utils::ConvertUTF8ToUTF16; 19 20 namespace { 21 22 template<typename CHAR> 23 void SetupReplacement(void (url_canon::Replacements<CHAR>::*func)(const CHAR*, 24 const url_parse::Component&), 25 url_canon::Replacements<CHAR>* replacements, 26 const CHAR* str) { 27 if (str) { 28 url_parse::Component comp; 29 if (str[0]) 30 comp.len = static_cast<int>(strlen(str)); 31 (replacements->*func)(str, comp); 32 } 33 } 34 35 // Returns the canonicalized string for the given URL string for the 36 // GURLTest.Types test. 37 std::string TypesTestCase(const char* src) { 38 GURL gurl(src); 39 return gurl.possibly_invalid_spec(); 40 } 41 42 } // namespace 43 44 // Different types of URLs should be handled differently by url_util, and 45 // handed off to different canonicalizers. 46 TEST(GURLTest, Types) { 47 // URLs with unknown schemes should be treated as path URLs, even when they 48 // have things like "://". 49 EXPECT_EQ("something:///HOSTNAME.com/", 50 TypesTestCase("something:///HOSTNAME.com/")); 51 52 // In the reverse, known schemes should always trigger standard URL handling. 53 EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com")); 54 EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com")); 55 EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com")); 56 EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com")); 57 58 #ifdef WIN32 59 // URLs that look like absolute Windows drive specs. 60 EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt")); 61 EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt")); 62 EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt")); 63 EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt")); 64 #endif 65 } 66 67 // Test the basic creation and querying of components in a GURL. We assume 68 // the parser is already tested and works, so we are mostly interested if the 69 // object does the right thing with the results. 70 TEST(GURLTest, Components) { 71 GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref")); 72 EXPECT_TRUE(url.is_valid()); 73 EXPECT_TRUE(url.SchemeIs("http")); 74 EXPECT_FALSE(url.SchemeIsFile()); 75 76 // This is the narrow version of the URL, which should match the wide input. 77 EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url.spec()); 78 79 EXPECT_EQ("http", url.scheme()); 80 EXPECT_EQ("user", url.username()); 81 EXPECT_EQ("pass", url.password()); 82 EXPECT_EQ("google.com", url.host()); 83 EXPECT_EQ("99", url.port()); 84 EXPECT_EQ(99, url.IntPort()); 85 EXPECT_EQ("/foo;bar", url.path()); 86 EXPECT_EQ("q=a", url.query()); 87 EXPECT_EQ("ref", url.ref()); 88 } 89 90 TEST(GURLTest, Empty) { 91 GURL url; 92 EXPECT_FALSE(url.is_valid()); 93 EXPECT_EQ("", url.spec()); 94 95 EXPECT_EQ("", url.scheme()); 96 EXPECT_EQ("", url.username()); 97 EXPECT_EQ("", url.password()); 98 EXPECT_EQ("", url.host()); 99 EXPECT_EQ("", url.port()); 100 EXPECT_EQ(url_parse::PORT_UNSPECIFIED, url.IntPort()); 101 EXPECT_EQ("", url.path()); 102 EXPECT_EQ("", url.query()); 103 EXPECT_EQ("", url.ref()); 104 } 105 106 TEST(GURLTest, Copy) { 107 GURL url(WStringToUTF16(L"http://user:pass@google.com:99/foo;bar?q=a#ref")); 108 109 GURL url2(url); 110 EXPECT_TRUE(url2.is_valid()); 111 112 EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec()); 113 EXPECT_EQ("http", url2.scheme()); 114 EXPECT_EQ("user", url2.username()); 115 EXPECT_EQ("pass", url2.password()); 116 EXPECT_EQ("google.com", url2.host()); 117 EXPECT_EQ("99", url2.port()); 118 EXPECT_EQ(99, url2.IntPort()); 119 EXPECT_EQ("/foo;bar", url2.path()); 120 EXPECT_EQ("q=a", url2.query()); 121 EXPECT_EQ("ref", url2.ref()); 122 123 // Copying of invalid URL should be invalid 124 GURL invalid; 125 GURL invalid2(invalid); 126 EXPECT_FALSE(invalid2.is_valid()); 127 EXPECT_EQ("", invalid2.spec()); 128 EXPECT_EQ("", invalid2.scheme()); 129 EXPECT_EQ("", invalid2.username()); 130 EXPECT_EQ("", invalid2.password()); 131 EXPECT_EQ("", invalid2.host()); 132 EXPECT_EQ("", invalid2.port()); 133 EXPECT_EQ(url_parse::PORT_UNSPECIFIED, invalid2.IntPort()); 134 EXPECT_EQ("", invalid2.path()); 135 EXPECT_EQ("", invalid2.query()); 136 EXPECT_EQ("", invalid2.ref()); 137 } 138 139 TEST(GURLTest, CopyFileSystem) { 140 GURL url(WStringToUTF16(L"filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref")); 141 142 GURL url2(url); 143 EXPECT_TRUE(url2.is_valid()); 144 145 EXPECT_EQ("filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref", url2.spec()); 146 EXPECT_EQ("filesystem", url2.scheme()); 147 EXPECT_EQ("", url2.username()); 148 EXPECT_EQ("", url2.password()); 149 EXPECT_EQ("", url2.host()); 150 EXPECT_EQ("", url2.port()); 151 EXPECT_EQ(url_parse::PORT_UNSPECIFIED, url2.IntPort()); 152 EXPECT_EQ("/foo;bar", url2.path()); 153 EXPECT_EQ("q=a", url2.query()); 154 EXPECT_EQ("ref", url2.ref()); 155 156 const GURL* inner = url2.inner_url(); 157 ASSERT_TRUE(inner); 158 EXPECT_EQ("https", inner->scheme()); 159 EXPECT_EQ("user", inner->username()); 160 EXPECT_EQ("pass", inner->password()); 161 EXPECT_EQ("google.com", inner->host()); 162 EXPECT_EQ("99", inner->port()); 163 EXPECT_EQ(99, inner->IntPort()); 164 EXPECT_EQ("/t", inner->path()); 165 EXPECT_EQ("", inner->query()); 166 EXPECT_EQ("", inner->ref()); 167 } 168 169 // Given an invalid URL, we should still get most of the components. 170 TEST(GURLTest, Invalid) { 171 GURL url("http:google.com:foo"); 172 EXPECT_FALSE(url.is_valid()); 173 EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec()); 174 175 EXPECT_EQ("http", url.scheme()); 176 EXPECT_EQ("", url.username()); 177 EXPECT_EQ("", url.password()); 178 EXPECT_EQ("google.com", url.host()); 179 EXPECT_EQ("foo", url.port()); 180 EXPECT_EQ(url_parse::PORT_INVALID, url.IntPort()); 181 EXPECT_EQ("/", url.path()); 182 EXPECT_EQ("", url.query()); 183 EXPECT_EQ("", url.ref()); 184 } 185 186 TEST(GURLTest, Resolve) { 187 // The tricky cases for relative URL resolving are tested in the 188 // canonicalizer unit test. Here, we just test that the GURL integration 189 // works properly. 190 struct ResolveCase { 191 const char* base; 192 const char* relative; 193 bool expected_valid; 194 const char* expected; 195 } resolve_cases[] = { 196 {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"}, 197 {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"}, 198 {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"}, 199 {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"}, 200 {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"}, 201 // A non-standard base can be replaced with a standard absolute URL. 202 {"data:blahblah", "http://google.com/", true, "http://google.com/"}, 203 {"data:blahblah", "http:google.com", true, "http://google.com/"}, 204 // Filesystem URLs have different paths to test. 205 {"filesystem:http://www.google.com/type/", "foo.html", true, "filesystem:http://www.google.com/type/foo.html"}, 206 {"filesystem:http://www.google.com/type/", "../foo.html", true, "filesystem:http://www.google.com/type/foo.html"}, 207 }; 208 209 for (size_t i = 0; i < ARRAYSIZE(resolve_cases); i++) { 210 // 8-bit code path. 211 GURL input(resolve_cases[i].base); 212 GURL output = input.Resolve(resolve_cases[i].relative); 213 EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i; 214 EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i; 215 EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL); 216 217 // Wide code path. 218 GURL inputw(ConvertUTF8ToUTF16(resolve_cases[i].base)); 219 GURL outputw = 220 input.Resolve(ConvertUTF8ToUTF16(resolve_cases[i].relative)); 221 EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i; 222 EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i; 223 EXPECT_EQ(outputw.SchemeIsFileSystem(), outputw.inner_url() != NULL); 224 } 225 } 226 227 TEST(GURLTest, GetOrigin) { 228 struct TestCase { 229 const char* input; 230 const char* expected; 231 } cases[] = { 232 {"http://www.google.com", "http://www.google.com/"}, 233 {"javascript:window.alert(\"hello,world\");", ""}, 234 {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/"}, 235 {"http://user@www.google.com", "http://www.google.com/"}, 236 {"http://:pass@www.google.com", "http://www.google.com/"}, 237 {"http://:@www.google.com", "http://www.google.com/"}, 238 {"filesystem:http://www.google.com/temp/foo?q#b", "http://www.google.com/"}, 239 {"filesystem:http://user:pass@google.com:21/blah#baz", "http://google.com:21/"}, 240 }; 241 for (size_t i = 0; i < ARRAYSIZE(cases); i++) { 242 GURL url(cases[i].input); 243 GURL origin = url.GetOrigin(); 244 EXPECT_EQ(cases[i].expected, origin.spec()); 245 } 246 } 247 248 TEST(GURLTest, GetWithEmptyPath) { 249 struct TestCase { 250 const char* input; 251 const char* expected; 252 } cases[] = { 253 {"http://www.google.com", "http://www.google.com/"}, 254 {"javascript:window.alert(\"hello, world\");", ""}, 255 {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"}, 256 {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"}, 257 {"filesystem:file:///temporary/bar.html?baz=22", "filesystem:file:///temporary/"}, 258 }; 259 260 for (size_t i = 0; i < ARRAYSIZE(cases); i++) { 261 GURL url(cases[i].input); 262 GURL empty_path = url.GetWithEmptyPath(); 263 EXPECT_EQ(cases[i].expected, empty_path.spec()); 264 } 265 } 266 267 TEST(GURLTest, Replacements) { 268 // The url canonicalizer replacement test will handle most of these case. 269 // The most important thing to do here is to check that the proper 270 // canonicalizer gets called based on the scheme of the input. 271 struct ReplaceCase { 272 const char* base; 273 const char* scheme; 274 const char* username; 275 const char* password; 276 const char* host; 277 const char* port; 278 const char* path; 279 const char* query; 280 const char* ref; 281 const char* expected; 282 } replace_cases[] = { 283 {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "http://www.google.com/"}, 284 {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", "", "window.open('foo');", "", "", "javascript:window.open('foo');"}, 285 {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", "/foo","search", "ref", "http://www.google.com:99/foo?search#ref"}, 286 #ifdef WIN32 287 {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", "c:\\", "", "", "file:///C:/"}, 288 #endif 289 {"filesystem:http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, NULL, "/", "", "", "filesystem:http://www.google.com/foo/"}, 290 }; 291 292 for (size_t i = 0; i < ARRAYSIZE(replace_cases); i++) { 293 const ReplaceCase& cur = replace_cases[i]; 294 GURL url(cur.base); 295 GURL::Replacements repl; 296 SetupReplacement(&GURL::Replacements::SetScheme, &repl, cur.scheme); 297 SetupReplacement(&GURL::Replacements::SetUsername, &repl, cur.username); 298 SetupReplacement(&GURL::Replacements::SetPassword, &repl, cur.password); 299 SetupReplacement(&GURL::Replacements::SetHost, &repl, cur.host); 300 SetupReplacement(&GURL::Replacements::SetPort, &repl, cur.port); 301 SetupReplacement(&GURL::Replacements::SetPath, &repl, cur.path); 302 SetupReplacement(&GURL::Replacements::SetQuery, &repl, cur.query); 303 SetupReplacement(&GURL::Replacements::SetRef, &repl, cur.ref); 304 GURL output = url.ReplaceComponents(repl); 305 306 EXPECT_EQ(replace_cases[i].expected, output.spec()); 307 EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL); 308 } 309 } 310 311 TEST(GURLTest, ClearFragmentOnDataUrl) { 312 // http://crbug.com/291747 - a data URL may legitimately have trailing 313 // whitespace in the spec after the ref is cleared. Test this does not trigger 314 // the url_parse::Parsed importing validation DCHECK in GURL. 315 GURL url(" data: one ? two # three "); 316 317 // By default the trailing whitespace will have been stripped. 318 EXPECT_EQ("data: one ? two # three", url.spec()); 319 GURL::Replacements repl; 320 repl.ClearRef(); 321 GURL url_no_ref = url.ReplaceComponents(repl); 322 323 EXPECT_EQ("data: one ? two ", url_no_ref.spec()); 324 325 // Importing a parsed url via this constructor overload will retain trailing 326 // whitespace. 327 GURL import_url(url_no_ref.spec(), 328 url_no_ref.parsed_for_possibly_invalid_spec(), 329 url_no_ref.is_valid()); 330 EXPECT_EQ(url_no_ref, import_url); 331 EXPECT_EQ(import_url.query(), " two "); 332 333 } 334 335 TEST(GURLTest, PathForRequest) { 336 struct TestCase { 337 const char* input; 338 const char* expected; 339 const char* inner_expected; 340 } cases[] = { 341 {"http://www.google.com", "/", NULL}, 342 {"http://www.google.com/", "/", NULL}, 343 {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22", NULL}, 344 {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html", NULL}, 345 {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query", NULL}, 346 {"filesystem:http://www.google.com/temporary/foo/bar.html?query#ref", "/foo/bar.html?query", "/temporary"}, 347 {"filesystem:http://www.google.com/temporary/foo/bar.html?query", "/foo/bar.html?query", "/temporary"}, 348 }; 349 350 for (size_t i = 0; i < ARRAYSIZE(cases); i++) { 351 GURL url(cases[i].input); 352 std::string path_request = url.PathForRequest(); 353 EXPECT_EQ(cases[i].expected, path_request); 354 EXPECT_EQ(cases[i].inner_expected == NULL, url.inner_url() == NULL); 355 if (url.inner_url() && cases[i].inner_expected) 356 EXPECT_EQ(cases[i].inner_expected, url.inner_url()->PathForRequest()); 357 } 358 } 359 360 TEST(GURLTest, EffectiveIntPort) { 361 struct PortTest { 362 const char* spec; 363 int expected_int_port; 364 } port_tests[] = { 365 // http 366 {"http://www.google.com/", 80}, 367 {"http://www.google.com:80/", 80}, 368 {"http://www.google.com:443/", 443}, 369 370 // https 371 {"https://www.google.com/", 443}, 372 {"https://www.google.com:443/", 443}, 373 {"https://www.google.com:80/", 80}, 374 375 // ftp 376 {"ftp://www.google.com/", 21}, 377 {"ftp://www.google.com:21/", 21}, 378 {"ftp://www.google.com:80/", 80}, 379 380 // gopher 381 {"gopher://www.google.com/", 70}, 382 {"gopher://www.google.com:70/", 70}, 383 {"gopher://www.google.com:80/", 80}, 384 385 // file - no port 386 {"file://www.google.com/", url_parse::PORT_UNSPECIFIED}, 387 {"file://www.google.com:443/", url_parse::PORT_UNSPECIFIED}, 388 389 // data - no port 390 {"data:www.google.com:90", url_parse::PORT_UNSPECIFIED}, 391 {"data:www.google.com", url_parse::PORT_UNSPECIFIED}, 392 393 // filesystem - no port 394 {"filesystem:http://www.google.com:90/t/foo", url_parse::PORT_UNSPECIFIED}, 395 {"filesystem:file:///t/foo", url_parse::PORT_UNSPECIFIED}, 396 }; 397 398 for (size_t i = 0; i < ARRAYSIZE(port_tests); i++) { 399 GURL url(port_tests[i].spec); 400 EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort()); 401 } 402 } 403 404 TEST(GURLTest, IPAddress) { 405 struct IPTest { 406 const char* spec; 407 bool expected_ip; 408 } ip_tests[] = { 409 {"http://www.google.com/", false}, 410 {"http://192.168.9.1/", true}, 411 {"http://192.168.9.1.2/", false}, 412 {"http://192.168.m.1/", false}, 413 {"http://2001:db8::1/", false}, 414 {"http://[2001:db8::1]/", true}, 415 {"", false}, 416 {"some random input!", false}, 417 }; 418 419 for (size_t i = 0; i < ARRAYSIZE(ip_tests); i++) { 420 GURL url(ip_tests[i].spec); 421 EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress()); 422 } 423 } 424 425 TEST(GURLTest, HostNoBrackets) { 426 struct TestCase { 427 const char* input; 428 const char* expected_host; 429 const char* expected_plainhost; 430 } cases[] = { 431 {"http://www.google.com", "www.google.com", "www.google.com"}, 432 {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"}, 433 {"http://[::]/", "[::]", "::"}, 434 435 // Don't require a valid URL, but don't crash either. 436 {"http://[]/", "[]", ""}, 437 {"http://[x]/", "[x]", "x"}, 438 {"http://[x/", "[x", "[x"}, 439 {"http://x]/", "x]", "x]"}, 440 {"http://[/", "[", "["}, 441 {"http://]/", "]", "]"}, 442 {"", "", ""}, 443 }; 444 for (size_t i = 0; i < ARRAYSIZE(cases); i++) { 445 GURL url(cases[i].input); 446 EXPECT_EQ(cases[i].expected_host, url.host()); 447 EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets()); 448 } 449 } 450 451 TEST(GURLTest, DomainIs) { 452 const char google_domain[] = "google.com"; 453 454 GURL url_1("http://www.google.com:99/foo"); 455 EXPECT_TRUE(url_1.DomainIs(google_domain)); 456 457 GURL url_2("http://google.com:99/foo"); 458 EXPECT_TRUE(url_2.DomainIs(google_domain)); 459 460 GURL url_3("http://google.com./foo"); 461 EXPECT_TRUE(url_3.DomainIs(google_domain)); 462 463 GURL url_4("http://google.com/foo"); 464 EXPECT_FALSE(url_4.DomainIs("google.com.")); 465 466 GURL url_5("http://google.com./foo"); 467 EXPECT_TRUE(url_5.DomainIs("google.com.")); 468 469 GURL url_6("http://www.google.com./foo"); 470 EXPECT_TRUE(url_6.DomainIs(".com.")); 471 472 GURL url_7("http://www.balabala.com/foo"); 473 EXPECT_FALSE(url_7.DomainIs(google_domain)); 474 475 GURL url_8("http://www.google.com.cn/foo"); 476 EXPECT_FALSE(url_8.DomainIs(google_domain)); 477 478 GURL url_9("http://www.iamnotgoogle.com/foo"); 479 EXPECT_FALSE(url_9.DomainIs(google_domain)); 480 481 GURL url_10("http://www.iamnotgoogle.com../foo"); 482 EXPECT_FALSE(url_10.DomainIs(".com")); 483 484 GURL url_11("filesystem:http://www.google.com:99/foo/"); 485 EXPECT_TRUE(url_11.DomainIs(google_domain)); 486 487 GURL url_12("filesystem:http://www.iamnotgoogle.com/foo/"); 488 EXPECT_FALSE(url_12.DomainIs(google_domain)); 489 } 490 491 // Newlines should be stripped from inputs. 492 TEST(GURLTest, Newlines) { 493 // Constructor. 494 GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n "); 495 EXPECT_EQ("http://www.google.com/asdf", url_1.spec()); 496 497 // Relative path resolver. 498 GURL url_2 = url_1.Resolve(" \n /fo\to\r "); 499 EXPECT_EQ("http://www.google.com/foo", url_2.spec()); 500 501 // Note that newlines are NOT stripped from ReplaceComponents. 502 } 503 504 TEST(GURLTest, IsStandard) { 505 GURL a("http:foo/bar"); 506 EXPECT_TRUE(a.IsStandard()); 507 508 GURL b("foo:bar/baz"); 509 EXPECT_FALSE(b.IsStandard()); 510 511 GURL c("foo://bar/baz"); 512 EXPECT_FALSE(c.IsStandard()); 513 } 514