1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/base/net_util.h" 6 7 #include <string.h> 8 9 #include <vector> 10 11 #include "base/format_macros.h" 12 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/utf_string_conversions.h" 14 #include "base/time/time.h" 15 #include "testing/gtest/include/gtest/gtest.h" 16 #include "url/gurl.h" 17 18 using base::ASCIIToUTF16; 19 using base::WideToUTF16; 20 21 namespace net { 22 23 namespace { 24 25 static const size_t kNpos = base::string16::npos; 26 27 const char* kLanguages[] = { 28 "", "en", "zh-CN", "ja", "ko", 29 "he", "ar", "ru", "el", "fr", 30 "de", "pt", "sv", "th", "hi", 31 "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en", 32 "zh,ru,en" 33 }; 34 35 struct IDNTestCase { 36 const char* input; 37 const wchar_t* unicode_output; 38 const bool unicode_allowed[arraysize(kLanguages)]; 39 }; 40 41 // TODO(jungshik) This is just a random sample of languages and is far 42 // from exhaustive. We may have to generate all the combinations 43 // of languages (powerset of a set of all the languages). 44 const IDNTestCase idn_cases[] = { 45 // No IDN 46 {"www.google.com", L"www.google.com", 47 {true, true, true, true, true, 48 true, true, true, true, true, 49 true, true, true, true, true, 50 true, true, true, true, true, 51 true}}, 52 {"www.google.com.", L"www.google.com.", 53 {true, true, true, true, true, 54 true, true, true, true, true, 55 true, true, true, true, true, 56 true, true, true, true, true, 57 true}}, 58 {".", L".", 59 {true, true, true, true, true, 60 true, true, true, true, true, 61 true, true, true, true, true, 62 true, true, true, true, true, 63 true}}, 64 {"", L"", 65 {true, true, true, true, true, 66 true, true, true, true, true, 67 true, true, true, true, true, 68 true, true, true, true, true, 69 true}}, 70 // IDN 71 // Hanzi (Traditional Chinese) 72 {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn", 73 {true, false, true, true, false, 74 false, false, false, false, false, 75 false, false, false, false, false, 76 false, false, true, true, false, 77 true}}, 78 // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh) 79 {"xn--cy2a840a.com", L"\x89c6\x9891.com", 80 {true, false, true, false, false, 81 false, false, false, false, false, 82 false, false, false, false, false, 83 false, false, false, false, false, 84 true}}, 85 // Hanzi + '123' 86 {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com", 87 {true, false, true, true, false, 88 false, false, false, false, false, 89 false, false, false, false, false, 90 false, false, true, true, false, 91 true}}, 92 // Hanzi + Latin : U+56FD is simplified and is regarded 93 // as not supported in zh-TW. 94 {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com", 95 {false, false, true, true, false, 96 false, false, false, false, false, 97 false, false, false, false, false, 98 false, false, false, true, false, 99 true}}, 100 // Kanji + Kana (Japanese) 101 {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp", 102 {true, false, false, true, false, 103 false, false, false, false, false, 104 false, false, false, false, false, 105 false, false, false, true, false, 106 false}}, 107 // Katakana including U+30FC 108 {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp", 109 {true, false, false, true, false, 110 false, false, false, false, false, 111 false, false, false, false, false, 112 false, false, false, true, false, 113 }}, 114 {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp", 115 {true, false, false, true, false, 116 false, false, false, false, false, 117 false, false, false, false, false, 118 false, false, false, true, false, 119 }}, 120 // Katakana + Latin (Japanese) 121 // TODO(jungshik): Change 'false' in the first element to 'true' 122 // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead 123 // of our IsIDNComponentInSingleScript(). 124 {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp", 125 {false, false, false, true, false, 126 false, false, false, false, false, 127 false, false, false, false, false, 128 false, false, false, true, false, 129 }}, 130 {"xn--3bkxe.jp", L"\x30c8\x309a.jp", 131 {false, false, false, true, false, 132 false, false, false, false, false, 133 false, false, false, false, false, 134 false, false, false, true, false, 135 }}, 136 // Hangul (Korean) 137 {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr", 138 {true, false, false, false, true, 139 false, false, false, false, false, 140 false, false, false, false, false, 141 false, false, false, true, false, 142 false}}, 143 // b<u-umlaut>cher (German) 144 {"xn--bcher-kva.de", L"b\x00fc" L"cher.de", 145 {true, false, false, false, false, 146 false, false, false, false, true, 147 true, false, false, false, false, 148 true, false, false, false, false, 149 false}}, 150 // a with diaeresis 151 {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se", 152 {true, false, false, false, false, 153 false, false, false, false, false, 154 true, false, true, false, false, 155 true, false, false, false, false, 156 false}}, 157 // c-cedilla (French) 158 {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr", 159 {true, false, false, false, false, 160 false, false, false, false, true, 161 false, true, false, false, false, 162 false, false, false, false, false, 163 false}}, 164 // caf'e with acute accent' (French) 165 {"xn--caf-dma.fr", L"caf\x00e9.fr", 166 {true, false, false, false, false, 167 false, false, false, false, true, 168 false, true, true, false, false, 169 false, false, false, false, false, 170 false}}, 171 // c-cedillla and a with tilde (Portuguese) 172 {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br", 173 {true, false, false, false, false, 174 false, false, false, false, false, 175 false, true, false, false, false, 176 false, false, false, false, false, 177 false}}, 178 // s with caron 179 {"xn--achy-f6a.com", L"\x0161" L"achy.com", 180 {true, false, false, false, false, 181 false, false, false, false, false, 182 false, false, false, false, false, 183 false, false, false, false, false, 184 false}}, 185 // TODO(jungshik) : Add examples with Cyrillic letters 186 // only used in some languages written in Cyrillic. 187 // Eutopia (Greek) 188 {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr", 189 {true, false, false, false, false, 190 false, false, false, true, false, 191 false, false, false, false, false, 192 false, true, false, false, false, 193 false}}, 194 // Eutopia + 123 (Greek) 195 {"xn---123-pldm0haj2bk.gr", 196 L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr", 197 {true, false, false, false, false, 198 false, false, false, true, false, 199 false, false, false, false, false, 200 false, true, false, false, false, 201 false}}, 202 // Cyrillic (Russian) 203 {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru", 204 {true, false, false, false, false, 205 false, false, true, false, false, 206 false, false, false, false, false, 207 false, false, false, false, true, 208 true}}, 209 // Cyrillic + 123 (Russian) 210 {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru", 211 {true, false, false, false, false, 212 false, false, true, false, false, 213 false, false, false, false, false, 214 false, false, false, false, true, 215 true}}, 216 // Arabic 217 {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar", 218 {true, false, false, false, false, 219 false, true, false, false, false, 220 false, false, false, false, false, 221 false, false, false, false, false, 222 false}}, 223 // Hebrew 224 {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he", 225 {true, false, false, false, false, 226 true, false, false, false, false, 227 false, false, false, false, false, 228 false, false, false, false, true, 229 false}}, 230 // Thai 231 {"xn--12c2cc4ag3b4ccu.th", 232 L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th", 233 {true, false, false, false, false, 234 false, false, false, false, false, 235 false, false, false, true, false, 236 false, false, false, false, false, 237 false}}, 238 // Devangari (Hindi) 239 {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in", 240 {true, false, false, false, false, 241 false, false, false, false, false, 242 false, false, false, false, true, 243 false, false, false, false, false, 244 false}}, 245 // Invalid IDN 246 {"xn--hello?world.com", NULL, 247 {false, false, false, false, false, 248 false, false, false, false, false, 249 false, false, false, false, false, 250 false, false, false, false, false, 251 false}}, 252 // Unsafe IDNs 253 // "payp<alpha>l.com" 254 {"www.xn--paypl-g9d.com", L"payp\x03b1l.com", 255 {false, false, false, false, false, 256 false, false, false, false, false, 257 false, false, false, false, false, 258 false, false, false, false, false, 259 false}}, 260 // google.gr with Greek omicron and epsilon 261 {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr", 262 {false, false, false, false, false, 263 false, false, false, false, false, 264 false, false, false, false, false, 265 false, false, false, false, false, 266 false}}, 267 // google.ru with Cyrillic o 268 {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru", 269 {false, false, false, false, false, 270 false, false, false, false, false, 271 false, false, false, false, false, 272 false, false, false, false, false, 273 false}}, 274 // h<e with acute>llo<China in Han>.cn 275 {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn", 276 {false, false, false, false, false, 277 false, false, false, false, false, 278 false, false, false, false, false, 279 false, false, false, false, false, 280 false}}, 281 // <Greek rho><Cyrillic a><Cyrillic u>.ru 282 {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru", 283 {false, false, false, false, false, 284 false, false, false, false, false, 285 false, false, false, false, false, 286 false, false, false, false, false, 287 false}}, 288 // One that's really long that will force a buffer realloc 289 {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 290 "aaaaaaa", 291 L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 292 L"aaaaaaaa", 293 {true, true, true, true, true, 294 true, true, true, true, true, 295 true, true, true, true, true, 296 true, true, true, true, true, 297 true}}, 298 // Test cases for characters we blacklisted although allowed in IDN. 299 // Embedded spaces will be turned to %20 in the display. 300 // TODO(jungshik): We need to have more cases. This is a typical 301 // data-driven trap. The following test cases need to be separated 302 // and tested only for a couple of languages. 303 {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr", 304 {false, false, false, false, false, 305 false, false, false, false, false, 306 false, false, false, false, false, 307 false, false, false, false, false, 308 false}}, 309 {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com", 310 {false, false, false, false, false, 311 false, false, false, false, false, 312 false, false, false, false, false, 313 false, false, false, false, false, 314 }}, 315 {"google.xn--comabc-k8d", L"google.com\x0338" L"abc", 316 {false, false, false, false, false, 317 false, false, false, false, false, 318 false, false, false, false, false, 319 false, false, false, false, false, 320 }}, 321 {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp", 322 {false, false, false, false, false, 323 false, false, false, false, false, 324 false, false, false, false, false, 325 false, false, false, false, false, 326 }}, 327 {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp", 328 {false, false, false, false, false, 329 false, false, false, false, false, 330 false, false, false, false, false, 331 false, false, false, false, false, 332 }}, 333 #if 0 334 // These two cases are special. We need a separate test. 335 // U+3000 and U+3002 are normalized to ASCII space and dot. 336 {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn", 337 {false, false, true, false, false, 338 false, false, false, false, false, 339 false, false, false, false, false, 340 false, false, true, false, false, 341 true}}, 342 {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn", 343 {false, false, true, false, false, 344 false, false, false, false, false, 345 false, false, false, false, false, 346 false, false, true, false, false, 347 true}}, 348 #endif 349 }; 350 351 struct AdjustOffsetCase { 352 size_t input_offset; 353 size_t output_offset; 354 }; 355 356 struct UrlTestData { 357 const char* description; 358 const char* input; 359 const char* languages; 360 FormatUrlTypes format_types; 361 UnescapeRule::Type escape_rules; 362 const wchar_t* output; // Use |wchar_t| to handle Unicode constants easily. 363 size_t prefix_len; 364 }; 365 366 // A helper for IDN*{Fast,Slow}. 367 // Append "::<language list>" to |expected| and |actual| to make it 368 // easy to tell which sub-case fails without debugging. 369 void AppendLanguagesToOutputs(const char* languages, 370 base::string16* expected, 371 base::string16* actual) { 372 base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages); 373 expected->append(to_append); 374 actual->append(to_append); 375 } 376 377 // A pair of helpers for the FormatUrlWithOffsets() test. 378 void VerboseExpect(size_t expected, 379 size_t actual, 380 const std::string& original_url, 381 size_t position, 382 const base::string16& formatted_url) { 383 EXPECT_EQ(expected, actual) << "Original URL: " << original_url 384 << " (at char " << position << ")\nFormatted URL: " << formatted_url; 385 } 386 387 void CheckAdjustedOffsets(const std::string& url_string, 388 const std::string& languages, 389 FormatUrlTypes format_types, 390 UnescapeRule::Type unescape_rules, 391 const size_t* output_offsets) { 392 GURL url(url_string); 393 size_t url_length = url_string.length(); 394 std::vector<size_t> offsets; 395 for (size_t i = 0; i <= url_length + 1; ++i) 396 offsets.push_back(i); 397 offsets.push_back(500000); // Something larger than any input length. 398 offsets.push_back(std::string::npos); 399 base::string16 formatted_url = FormatUrlWithOffsets(url, languages, 400 format_types, unescape_rules, NULL, NULL, &offsets); 401 for (size_t i = 0; i < url_length; ++i) 402 VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url); 403 VerboseExpect(formatted_url.length(), offsets[url_length], url_string, 404 url_length, formatted_url); 405 VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string, 406 500000, formatted_url); 407 VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string, 408 std::string::npos, formatted_url); 409 } 410 411 } // anonymous namespace 412 413 TEST(NetUtilTest, IDNToUnicodeFast) { 414 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) { 415 for (size_t j = 0; j < arraysize(kLanguages); j++) { 416 // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow 417 if (j == 3 || j == 17 || j == 18) 418 continue; 419 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); 420 base::string16 expected(idn_cases[i].unicode_allowed[j] ? 421 WideToUTF16(idn_cases[i].unicode_output) : 422 ASCIIToUTF16(idn_cases[i].input)); 423 AppendLanguagesToOutputs(kLanguages[j], &expected, &output); 424 EXPECT_EQ(expected, output); 425 } 426 } 427 } 428 429 TEST(NetUtilTest, IDNToUnicodeSlow) { 430 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) { 431 for (size_t j = 0; j < arraysize(kLanguages); j++) { 432 // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast 433 if (!(j == 3 || j == 17 || j == 18)) 434 continue; 435 base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j])); 436 base::string16 expected(idn_cases[i].unicode_allowed[j] ? 437 WideToUTF16(idn_cases[i].unicode_output) : 438 ASCIIToUTF16(idn_cases[i].input)); 439 AppendLanguagesToOutputs(kLanguages[j], &expected, &output); 440 EXPECT_EQ(expected, output); 441 } 442 } 443 } 444 445 TEST(NetUtilTest, StripWWW) { 446 EXPECT_EQ(base::string16(), StripWWW(base::string16())); 447 EXPECT_EQ(base::string16(), StripWWW(ASCIIToUTF16("www."))); 448 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah"))); 449 EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah"))); 450 } 451 452 // This is currently a windows specific function. 453 #if defined(OS_WIN) 454 namespace { 455 456 struct GetDirectoryListingEntryCase { 457 const wchar_t* name; 458 const char* raw_bytes; 459 bool is_dir; 460 int64 filesize; 461 base::Time time; 462 const char* expected; 463 }; 464 465 } // namespace 466 467 TEST(NetUtilTest, GetDirectoryListingEntry) { 468 const GetDirectoryListingEntryCase test_cases[] = { 469 {L"Foo", 470 "", 471 false, 472 10000, 473 base::Time(), 474 "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"}, 475 {L"quo\"tes", 476 "", 477 false, 478 10000, 479 base::Time(), 480 "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>" 481 "\n"}, 482 {L"quo\"tes", 483 "quo\"tes", 484 false, 485 10000, 486 base::Time(), 487 "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>" 488 "\n"}, 489 // U+D55C0 U+AE00. raw_bytes is empty (either a local file with 490 // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8 491 {L"\xD55C\xAE00.txt", 492 "", 493 false, 494 10000, 495 base::Time(), 496 "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\"," 497 "\"%ED%95%9C%EA%B8%80.txt\",0,\"9.8 kB\",\"\");</script>\n"}, 498 // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence: 499 // a local or remote file in EUC-KR. 500 {L"\xD55C\xAE00.txt", 501 "\xC7\xD1\xB1\xDB.txt", 502 false, 503 10000, 504 base::Time(), 505 "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\",\"%C7%D1%B1%DB.txt\"" 506 ",0,\"9.8 kB\",\"\");</script>\n"}, 507 }; 508 509 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { 510 const std::string results = GetDirectoryListingEntry( 511 WideToUTF16(test_cases[i].name), 512 test_cases[i].raw_bytes, 513 test_cases[i].is_dir, 514 test_cases[i].filesize, 515 test_cases[i].time); 516 EXPECT_EQ(test_cases[i].expected, results); 517 } 518 } 519 520 #endif 521 522 TEST(NetUtilTest, FormatUrl) { 523 FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword; 524 const UrlTestData tests[] = { 525 {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0}, 526 527 {"Simple URL", 528 "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL, 529 L"http://www.google.com/", 7}, 530 531 {"With a port number and a reference", 532 "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type, 533 UnescapeRule::NORMAL, 534 L"http://www.google.com:8080/#\x30B0", 7}, 535 536 // -------- IDN tests -------- 537 {"Japanese IDN with ja", 538 "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type, 539 UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, 540 541 {"Japanese IDN with en", 542 "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type, 543 UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7}, 544 545 {"Japanese IDN without any languages", 546 "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type, 547 UnescapeRule::NORMAL, 548 // Single script is safe for empty languages. 549 L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7}, 550 551 {"mailto: with Japanese IDN", 552 "mailto:foo (at) xn--l8jvb1ey91xtjb.jp", "ja", default_format_type, 553 UnescapeRule::NORMAL, 554 // GURL doesn't assume an email address's domain part as a host name. 555 L"mailto:foo (at) xn--l8jvb1ey91xtjb.jp", 7}, 556 557 {"file: with Japanese IDN", 558 "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type, 559 UnescapeRule::NORMAL, 560 L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7}, 561 562 {"ftp: with Japanese IDN", 563 "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type, 564 UnescapeRule::NORMAL, 565 L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6}, 566 567 // -------- omit_username_password flag tests -------- 568 {"With username and password, omit_username_password=false", 569 "http://user:passwd@example.com/foo", "", 570 kFormatUrlOmitNothing, UnescapeRule::NORMAL, 571 L"http://user:passwd@example.com/foo", 19}, 572 573 {"With username and password, omit_username_password=true", 574 "http://user:passwd@example.com/foo", "", default_format_type, 575 UnescapeRule::NORMAL, L"http://example.com/foo", 7}, 576 577 {"With username and no password", 578 "http://user@example.com/foo", "", default_format_type, 579 UnescapeRule::NORMAL, L"http://example.com/foo", 7}, 580 581 {"Just '@' without username and password", 582 "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL, 583 L"http://example.com/foo", 7}, 584 585 // GURL doesn't think local-part of an email address is username for URL. 586 {"mailto:, omit_username_password=true", 587 "mailto:foo (at) example.com", "", default_format_type, UnescapeRule::NORMAL, 588 L"mailto:foo (at) example.com", 7}, 589 590 // -------- unescape flag tests -------- 591 {"Do not unescape", 592 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" 593 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 594 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type, 595 UnescapeRule::NONE, 596 // GURL parses %-encoded hostnames into Punycode. 597 L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 598 L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7}, 599 600 {"Unescape normally", 601 "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/" 602 "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB" 603 "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type, 604 UnescapeRule::NORMAL, 605 L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB" 606 L"?q=\x30B0\x30FC\x30B0\x30EB", 7}, 607 608 {"Unescape normally with BiDi control character", 609 "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", default_format_type, 610 UnescapeRule::NORMAL, L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7}, 611 612 {"Unescape normally including unescape spaces", 613 "http://www.google.com/search?q=Hello%20World", "en", default_format_type, 614 UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7}, 615 616 /* 617 {"unescape=true with some special characters", 618 "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "", 619 kFormatUrlOmitNothing, UnescapeRule::NORMAL, 620 L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25}, 621 */ 622 // Disabled: the resultant URL becomes "...user%253A:%2540passwd...". 623 624 // -------- omit http: -------- 625 {"omit http with user name", 626 "http://user@example.com/foo", "", kFormatUrlOmitAll, 627 UnescapeRule::NORMAL, L"example.com/foo", 0}, 628 629 {"omit http", 630 "http://www.google.com/", "en", kFormatUrlOmitHTTP, 631 UnescapeRule::NORMAL, L"www.google.com/", 632 0}, 633 634 {"omit http with https", 635 "https://www.google.com/", "en", kFormatUrlOmitHTTP, 636 UnescapeRule::NORMAL, L"https://www.google.com/", 637 8}, 638 639 {"omit http starts with ftp.", 640 "http://ftp.google.com/", "en", kFormatUrlOmitHTTP, 641 UnescapeRule::NORMAL, L"http://ftp.google.com/", 642 7}, 643 644 // -------- omit trailing slash on bare hostname -------- 645 {"omit slash when it's the entire path", 646 "http://www.google.com/", "en", 647 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, 648 L"http://www.google.com", 7}, 649 {"omit slash when there's a ref", 650 "http://www.google.com/#ref", "en", 651 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, 652 L"http://www.google.com/#ref", 7}, 653 {"omit slash when there's a query", 654 "http://www.google.com/?", "en", 655 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, 656 L"http://www.google.com/?", 7}, 657 {"omit slash when it's not the entire path", 658 "http://www.google.com/foo", "en", 659 kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL, 660 L"http://www.google.com/foo", 7}, 661 {"omit slash for nonstandard URLs", 662 "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname, 663 UnescapeRule::NORMAL, L"data:/", 5}, 664 {"omit slash for file URLs", 665 "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname, 666 UnescapeRule::NORMAL, L"file:///", 7}, 667 668 // -------- view-source: -------- 669 {"view-source", 670 "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type, 671 UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/", 672 19}, 673 674 {"view-source of view-source", 675 "view-source:view-source:http://xn--qcka1pmc.jp/", "ja", 676 default_format_type, UnescapeRule::NORMAL, 677 L"view-source:view-source:http://xn--qcka1pmc.jp/", 12}, 678 679 // view-source should omit http and trailing slash where non-view-source 680 // would. 681 {"view-source omit http", 682 "view-source:http://a.b/c", "en", kFormatUrlOmitAll, 683 UnescapeRule::NORMAL, L"view-source:a.b/c", 684 12}, 685 {"view-source omit http starts with ftp.", 686 "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll, 687 UnescapeRule::NORMAL, L"view-source:http://ftp.b/c", 688 19}, 689 {"view-source omit slash when it's the entire path", 690 "view-source:http://a.b/", "en", kFormatUrlOmitAll, 691 UnescapeRule::NORMAL, L"view-source:a.b", 692 12}, 693 }; 694 695 for (size_t i = 0; i < arraysize(tests); ++i) { 696 size_t prefix_len; 697 base::string16 formatted = FormatUrl( 698 GURL(tests[i].input), tests[i].languages, tests[i].format_types, 699 tests[i].escape_rules, NULL, &prefix_len, NULL); 700 EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description; 701 EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description; 702 } 703 } 704 705 TEST(NetUtilTest, FormatUrlParsed) { 706 // No unescape case. 707 url::Parsed parsed; 708 base::string16 formatted = FormatUrl( 709 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" 710 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), 711 "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL, 712 NULL); 713 EXPECT_EQ(WideToUTF16( 714 L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080" 715 L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted); 716 EXPECT_EQ(WideToUTF16(L"%E3%82%B0"), 717 formatted.substr(parsed.username.begin, parsed.username.len)); 718 EXPECT_EQ(WideToUTF16(L"%E3%83%BC"), 719 formatted.substr(parsed.password.begin, parsed.password.len)); 720 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), 721 formatted.substr(parsed.host.begin, parsed.host.len)); 722 EXPECT_EQ(WideToUTF16(L"8080"), 723 formatted.substr(parsed.port.begin, parsed.port.len)); 724 EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"), 725 formatted.substr(parsed.path.begin, parsed.path.len)); 726 EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"), 727 formatted.substr(parsed.query.begin, parsed.query.len)); 728 EXPECT_EQ(WideToUTF16(L"\x30B0"), 729 formatted.substr(parsed.ref.begin, parsed.ref.len)); 730 731 // Unescape case. 732 formatted = FormatUrl( 733 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" 734 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), 735 "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL, 736 NULL); 737 EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080" 738 L"/\x30B0/?q=\x30B0#\x30B0"), formatted); 739 EXPECT_EQ(WideToUTF16(L"\x30B0"), 740 formatted.substr(parsed.username.begin, parsed.username.len)); 741 EXPECT_EQ(WideToUTF16(L"\x30FC"), 742 formatted.substr(parsed.password.begin, parsed.password.len)); 743 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), 744 formatted.substr(parsed.host.begin, parsed.host.len)); 745 EXPECT_EQ(WideToUTF16(L"8080"), 746 formatted.substr(parsed.port.begin, parsed.port.len)); 747 EXPECT_EQ(WideToUTF16(L"/\x30B0/"), 748 formatted.substr(parsed.path.begin, parsed.path.len)); 749 EXPECT_EQ(WideToUTF16(L"q=\x30B0"), 750 formatted.substr(parsed.query.begin, parsed.query.len)); 751 EXPECT_EQ(WideToUTF16(L"\x30B0"), 752 formatted.substr(parsed.ref.begin, parsed.ref.len)); 753 754 // Omit_username_password + unescape case. 755 formatted = FormatUrl( 756 GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/" 757 "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"), 758 "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed, 759 NULL, NULL); 760 EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080" 761 L"/\x30B0/?q=\x30B0#\x30B0"), formatted); 762 EXPECT_FALSE(parsed.username.is_valid()); 763 EXPECT_FALSE(parsed.password.is_valid()); 764 EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"), 765 formatted.substr(parsed.host.begin, parsed.host.len)); 766 EXPECT_EQ(WideToUTF16(L"8080"), 767 formatted.substr(parsed.port.begin, parsed.port.len)); 768 EXPECT_EQ(WideToUTF16(L"/\x30B0/"), 769 formatted.substr(parsed.path.begin, parsed.path.len)); 770 EXPECT_EQ(WideToUTF16(L"q=\x30B0"), 771 formatted.substr(parsed.query.begin, parsed.query.len)); 772 EXPECT_EQ(WideToUTF16(L"\x30B0"), 773 formatted.substr(parsed.ref.begin, parsed.ref.len)); 774 775 // View-source case. 776 formatted = 777 FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"), 778 std::string(), 779 kFormatUrlOmitUsernamePassword, 780 UnescapeRule::NORMAL, 781 &parsed, 782 NULL, 783 NULL); 784 EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"), 785 formatted); 786 EXPECT_EQ(WideToUTF16(L"view-source:http"), 787 formatted.substr(parsed.scheme.begin, parsed.scheme.len)); 788 EXPECT_FALSE(parsed.username.is_valid()); 789 EXPECT_FALSE(parsed.password.is_valid()); 790 EXPECT_EQ(WideToUTF16(L"host"), 791 formatted.substr(parsed.host.begin, parsed.host.len)); 792 EXPECT_EQ(WideToUTF16(L"81"), 793 formatted.substr(parsed.port.begin, parsed.port.len)); 794 EXPECT_EQ(WideToUTF16(L"/path"), 795 formatted.substr(parsed.path.begin, parsed.path.len)); 796 EXPECT_EQ(WideToUTF16(L"query"), 797 formatted.substr(parsed.query.begin, parsed.query.len)); 798 EXPECT_EQ(WideToUTF16(L"ref"), 799 formatted.substr(parsed.ref.begin, parsed.ref.len)); 800 801 // omit http case. 802 formatted = FormatUrl(GURL("http://host:8000/a?b=c#d"), 803 std::string(), 804 kFormatUrlOmitHTTP, 805 UnescapeRule::NORMAL, 806 &parsed, 807 NULL, 808 NULL); 809 EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted); 810 EXPECT_FALSE(parsed.scheme.is_valid()); 811 EXPECT_FALSE(parsed.username.is_valid()); 812 EXPECT_FALSE(parsed.password.is_valid()); 813 EXPECT_EQ(WideToUTF16(L"host"), 814 formatted.substr(parsed.host.begin, parsed.host.len)); 815 EXPECT_EQ(WideToUTF16(L"8000"), 816 formatted.substr(parsed.port.begin, parsed.port.len)); 817 EXPECT_EQ(WideToUTF16(L"/a"), 818 formatted.substr(parsed.path.begin, parsed.path.len)); 819 EXPECT_EQ(WideToUTF16(L"b=c"), 820 formatted.substr(parsed.query.begin, parsed.query.len)); 821 EXPECT_EQ(WideToUTF16(L"d"), 822 formatted.substr(parsed.ref.begin, parsed.ref.len)); 823 824 // omit http starts with ftp case. 825 formatted = FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"), 826 std::string(), 827 kFormatUrlOmitHTTP, 828 UnescapeRule::NORMAL, 829 &parsed, 830 NULL, 831 NULL); 832 EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted); 833 EXPECT_TRUE(parsed.scheme.is_valid()); 834 EXPECT_FALSE(parsed.username.is_valid()); 835 EXPECT_FALSE(parsed.password.is_valid()); 836 EXPECT_EQ(WideToUTF16(L"http"), 837 formatted.substr(parsed.scheme.begin, parsed.scheme.len)); 838 EXPECT_EQ(WideToUTF16(L"ftp.host"), 839 formatted.substr(parsed.host.begin, parsed.host.len)); 840 EXPECT_EQ(WideToUTF16(L"8000"), 841 formatted.substr(parsed.port.begin, parsed.port.len)); 842 EXPECT_EQ(WideToUTF16(L"/a"), 843 formatted.substr(parsed.path.begin, parsed.path.len)); 844 EXPECT_EQ(WideToUTF16(L"b=c"), 845 formatted.substr(parsed.query.begin, parsed.query.len)); 846 EXPECT_EQ(WideToUTF16(L"d"), 847 formatted.substr(parsed.ref.begin, parsed.ref.len)); 848 849 // omit http starts with 'f' case. 850 formatted = FormatUrl(GURL("http://f/"), 851 std::string(), 852 kFormatUrlOmitHTTP, 853 UnescapeRule::NORMAL, 854 &parsed, 855 NULL, 856 NULL); 857 EXPECT_EQ(WideToUTF16(L"f/"), formatted); 858 EXPECT_FALSE(parsed.scheme.is_valid()); 859 EXPECT_FALSE(parsed.username.is_valid()); 860 EXPECT_FALSE(parsed.password.is_valid()); 861 EXPECT_FALSE(parsed.port.is_valid()); 862 EXPECT_TRUE(parsed.path.is_valid()); 863 EXPECT_FALSE(parsed.query.is_valid()); 864 EXPECT_FALSE(parsed.ref.is_valid()); 865 EXPECT_EQ(WideToUTF16(L"f"), 866 formatted.substr(parsed.host.begin, parsed.host.len)); 867 EXPECT_EQ(WideToUTF16(L"/"), 868 formatted.substr(parsed.path.begin, parsed.path.len)); 869 } 870 871 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL 872 // results in the original GURL, for each ASCII character in the path. 873 TEST(NetUtilTest, FormatUrlRoundTripPathASCII) { 874 for (unsigned char test_char = 32; test_char < 128; ++test_char) { 875 GURL url(std::string("http://www.google.com/") + 876 static_cast<char>(test_char)); 877 size_t prefix_len; 878 base::string16 formatted = FormatUrl(url, 879 std::string(), 880 kFormatUrlOmitUsernamePassword, 881 UnescapeRule::NORMAL, 882 NULL, 883 &prefix_len, 884 NULL); 885 EXPECT_EQ(url.spec(), GURL(formatted).spec()); 886 } 887 } 888 889 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL 890 // results in the original GURL, for each escaped ASCII character in the path. 891 TEST(NetUtilTest, FormatUrlRoundTripPathEscaped) { 892 for (unsigned char test_char = 32; test_char < 128; ++test_char) { 893 std::string original_url("http://www.google.com/"); 894 original_url.push_back('%'); 895 original_url.append(base::HexEncode(&test_char, 1)); 896 897 GURL url(original_url); 898 size_t prefix_len; 899 base::string16 formatted = FormatUrl(url, 900 std::string(), 901 kFormatUrlOmitUsernamePassword, 902 UnescapeRule::NORMAL, 903 NULL, 904 &prefix_len, 905 NULL); 906 EXPECT_EQ(url.spec(), GURL(formatted).spec()); 907 } 908 } 909 910 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL 911 // results in the original GURL, for each ASCII character in the query. 912 TEST(NetUtilTest, FormatUrlRoundTripQueryASCII) { 913 for (unsigned char test_char = 32; test_char < 128; ++test_char) { 914 GURL url(std::string("http://www.google.com/?") + 915 static_cast<char>(test_char)); 916 size_t prefix_len; 917 base::string16 formatted = FormatUrl(url, 918 std::string(), 919 kFormatUrlOmitUsernamePassword, 920 UnescapeRule::NORMAL, 921 NULL, 922 &prefix_len, 923 NULL); 924 EXPECT_EQ(url.spec(), GURL(formatted).spec()); 925 } 926 } 927 928 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL 929 // only results in a different GURL for certain characters. 930 TEST(NetUtilTest, FormatUrlRoundTripQueryEscaped) { 931 // A full list of characters which FormatURL should unescape and GURL should 932 // not escape again, when they appear in a query string. 933 const char* kUnescapedCharacters = 934 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~"; 935 for (unsigned char test_char = 0; test_char < 128; ++test_char) { 936 std::string original_url("http://www.google.com/?"); 937 original_url.push_back('%'); 938 original_url.append(base::HexEncode(&test_char, 1)); 939 940 GURL url(original_url); 941 size_t prefix_len; 942 base::string16 formatted = FormatUrl(url, 943 std::string(), 944 kFormatUrlOmitUsernamePassword, 945 UnescapeRule::NORMAL, 946 NULL, 947 &prefix_len, 948 NULL); 949 950 if (test_char && 951 strchr(kUnescapedCharacters, static_cast<char>(test_char))) { 952 EXPECT_NE(url.spec(), GURL(formatted).spec()); 953 } else { 954 EXPECT_EQ(url.spec(), GURL(formatted).spec()); 955 } 956 } 957 } 958 959 TEST(NetUtilTest, FormatUrlWithOffsets) { 960 CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing, 961 UnescapeRule::NORMAL, NULL); 962 963 const size_t basic_offsets[] = { 964 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 965 21, 22, 23, 24, 25 966 }; 967 CheckAdjustedOffsets("http://www.google.com/foo/", "en", 968 kFormatUrlOmitNothing, UnescapeRule::NORMAL, 969 basic_offsets); 970 971 const size_t omit_auth_offsets_1[] = { 972 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, 973 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 974 }; 975 CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en", 976 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, 977 omit_auth_offsets_1); 978 979 const size_t omit_auth_offsets_2[] = { 980 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14, 981 15, 16, 17, 18, 19, 20, 21 982 }; 983 CheckAdjustedOffsets("http://foo@www.google.com/", "en", 984 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, 985 omit_auth_offsets_2); 986 987 const size_t dont_omit_auth_offsets[] = { 988 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 989 kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 990 kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 991 30, 31 992 }; 993 // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com". 994 CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en", 995 kFormatUrlOmitNothing, UnescapeRule::NORMAL, 996 dont_omit_auth_offsets); 997 998 const size_t view_source_offsets[] = { 999 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos, 1000 kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33 1001 }; 1002 CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en", 1003 kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, 1004 view_source_offsets); 1005 1006 const size_t idn_hostname_offsets_1[] = { 1007 0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 1008 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, 1009 13, 14, 15, 16, 17, 18, 19 1010 }; 1011 // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/". 1012 CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja", 1013 kFormatUrlOmitNothing, UnescapeRule::NORMAL, 1014 idn_hostname_offsets_1); 1015 1016 const size_t idn_hostname_offsets_2[] = { 1017 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, 1018 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos, 1019 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 1020 kNpos, 19, 20, 21, 22, 23, 24 1021 }; 1022 // Convert punycode to 1023 // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/". 1024 CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/", 1025 "zh-CN", kFormatUrlOmitNothing, UnescapeRule::NORMAL, 1026 idn_hostname_offsets_2); 1027 1028 const size_t unescape_offsets[] = { 1029 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1030 21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos, 1031 kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos, 1032 kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 1033 kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos 1034 }; 1035 // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB". 1036 CheckAdjustedOffsets( 1037 "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 1038 "en", kFormatUrlOmitNothing, UnescapeRule::SPACES, unescape_offsets); 1039 1040 const size_t ref_offsets[] = { 1041 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 1042 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos, 1043 33 1044 }; 1045 // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z". 1046 CheckAdjustedOffsets( 1047 "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en", 1048 kFormatUrlOmitNothing, UnescapeRule::NORMAL, ref_offsets); 1049 1050 const size_t omit_http_offsets[] = { 1051 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1052 10, 11, 12, 13, 14 1053 }; 1054 CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP, 1055 UnescapeRule::NORMAL, omit_http_offsets); 1056 1057 const size_t omit_http_start_with_ftp_offsets[] = { 1058 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 1059 }; 1060 CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP, 1061 UnescapeRule::NORMAL, omit_http_start_with_ftp_offsets); 1062 1063 const size_t omit_all_offsets[] = { 1064 0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos, 1065 0, 1, 2, 3, 4, 5, 6, 7 1066 }; 1067 CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll, 1068 UnescapeRule::NORMAL, omit_all_offsets); 1069 } 1070 1071 } // namespace net 1072