Home | History | Annotate | Download | only in base
      1 // Copyright 2014 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/base/net_util.h"
      6 
      7 #include <string.h>
      8 
      9 #include <vector>
     10 
     11 #include "base/format_macros.h"
     12 #include "base/strings/string_number_conversions.h"
     13 #include "base/strings/utf_string_conversions.h"
     14 #include "base/time/time.h"
     15 #include "testing/gtest/include/gtest/gtest.h"
     16 #include "url/gurl.h"
     17 
     18 using base::ASCIIToUTF16;
     19 using base::WideToUTF16;
     20 
     21 namespace net {
     22 
     23 namespace {
     24 
     25 static const size_t kNpos = base::string16::npos;
     26 
     27 const char* kLanguages[] = {
     28   "",      "en",    "zh-CN",    "ja",    "ko",
     29   "he",    "ar",    "ru",       "el",    "fr",
     30   "de",    "pt",    "sv",       "th",    "hi",
     31   "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en",
     32   "zh,ru,en"
     33 };
     34 
     35 struct IDNTestCase {
     36   const char* input;
     37   const wchar_t* unicode_output;
     38   const bool unicode_allowed[arraysize(kLanguages)];
     39 };
     40 
     41 // TODO(jungshik) This is just a random sample of languages and is far
     42 // from exhaustive.  We may have to generate all the combinations
     43 // of languages (powerset of a set of all the languages).
     44 const IDNTestCase idn_cases[] = {
     45   // No IDN
     46   {"www.google.com", L"www.google.com",
     47    {true,  true,  true,  true,  true,
     48     true,  true,  true,  true,  true,
     49     true,  true,  true,  true,  true,
     50     true,  true,  true,  true,  true,
     51     true}},
     52   {"www.google.com.", L"www.google.com.",
     53    {true,  true,  true,  true,  true,
     54     true,  true,  true,  true,  true,
     55     true,  true,  true,  true,  true,
     56     true,  true,  true,  true,  true,
     57     true}},
     58   {".", L".",
     59    {true,  true,  true,  true,  true,
     60     true,  true,  true,  true,  true,
     61     true,  true,  true,  true,  true,
     62     true,  true,  true,  true,  true,
     63     true}},
     64   {"", L"",
     65    {true,  true,  true,  true,  true,
     66     true,  true,  true,  true,  true,
     67     true,  true,  true,  true,  true,
     68     true,  true,  true,  true,  true,
     69     true}},
     70   // IDN
     71   // Hanzi (Traditional Chinese)
     72   {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn",
     73    {true,  false, true,  true,  false,
     74     false, false, false, false, false,
     75     false, false, false, false, false,
     76     false, false, true,  true,  false,
     77     true}},
     78   // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
     79   {"xn--cy2a840a.com", L"\x89c6\x9891.com",
     80    {true,  false, true,  false,  false,
     81     false, false, false, false, false,
     82     false, false, false, false, false,
     83     false, false, false, false,  false,
     84     true}},
     85   // Hanzi + '123'
     86   {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com",
     87    {true,  false, true,  true,  false,
     88     false, false, false, false, false,
     89     false, false, false, false, false,
     90     false, false, true,  true,  false,
     91     true}},
     92   // Hanzi + Latin : U+56FD is simplified and is regarded
     93   // as not supported in zh-TW.
     94   {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
     95    {false, false, true,  true,  false,
     96     false, false, false, false, false,
     97     false, false, false, false, false,
     98     false, false, false, true,  false,
     99     true}},
    100   // Kanji + Kana (Japanese)
    101   {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp",
    102    {true,  false, false, true,  false,
    103     false, false, false, false, false,
    104     false, false, false, false, false,
    105     false, false, false, true,  false,
    106     false}},
    107   // Katakana including U+30FC
    108   {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp",
    109    {true, false, false, true,  false,
    110     false, false, false, false, false,
    111     false, false, false, false, false,
    112     false, false, false, true, false,
    113     }},
    114   {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
    115    {true, false, false, true,  false,
    116     false, false, false, false, false,
    117     false, false, false, false, false,
    118     false, false, false, true, false,
    119     }},
    120   // Katakana + Latin (Japanese)
    121   // TODO(jungshik): Change 'false' in the first element to 'true'
    122   // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead
    123   // of our IsIDNComponentInSingleScript().
    124   {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
    125    {false, false, false, true,  false,
    126     false, false, false, false, false,
    127     false, false, false, false, false,
    128     false, false, false, true, false,
    129     }},
    130   {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
    131    {false, false, false, true,  false,
    132     false, false, false, false, false,
    133     false, false, false, false, false,
    134     false, false, false, true, false,
    135     }},
    136   // Hangul (Korean)
    137   {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr",
    138    {true,  false, false, false, true,
    139     false, false, false, false, false,
    140     false, false, false, false, false,
    141     false, false, false, true,  false,
    142     false}},
    143   // b<u-umlaut>cher (German)
    144   {"xn--bcher-kva.de", L"b\x00fc" L"cher.de",
    145    {true,  false, false, false, false,
    146     false, false, false, false, true,
    147     true,  false,  false, false, false,
    148     true,  false, false, false, false,
    149     false}},
    150   // a with diaeresis
    151   {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
    152    {true,  false, false, false, false,
    153     false, false, false, false, false,
    154     true,  false, true, false, false,
    155     true,  false, false, false, false,
    156     false}},
    157   // c-cedilla (French)
    158   {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
    159    {true,  false, false, false, false,
    160     false, false, false, false, true,
    161     false, true,  false, false, false,
    162     false, false, false, false, false,
    163     false}},
    164   // caf'e with acute accent' (French)
    165   {"xn--caf-dma.fr", L"caf\x00e9.fr",
    166    {true,  false, false, false, false,
    167     false, false, false, false, true,
    168     false, true,  true,  false, false,
    169     false, false, false, false, false,
    170     false}},
    171   // c-cedillla and a with tilde (Portuguese)
    172   {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
    173    {true,  false, false, false, false,
    174     false, false, false, false, false,
    175     false, true,  false, false, false,
    176     false, false, false, false, false,
    177     false}},
    178   // s with caron
    179   {"xn--achy-f6a.com", L"\x0161" L"achy.com",
    180    {true,  false, false, false, false,
    181     false, false, false, false, false,
    182     false, false, false, false, false,
    183     false, false, false, false, false,
    184     false}},
    185   // TODO(jungshik) : Add examples with Cyrillic letters
    186   // only used in some languages written in Cyrillic.
    187   // Eutopia (Greek)
    188   {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
    189    {true,  false, false, false, false,
    190     false, false, false, true,  false,
    191     false, false, false, false, false,
    192     false, true,  false, false, false,
    193     false}},
    194   // Eutopia + 123 (Greek)
    195   {"xn---123-pldm0haj2bk.gr",
    196    L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
    197    {true,  false, false, false, false,
    198     false, false, false, true,  false,
    199     false, false, false, false, false,
    200     false, true,  false, false, false,
    201     false}},
    202   // Cyrillic (Russian)
    203   {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
    204    {true,  false, false, false, false,
    205     false, false, true,  false, false,
    206     false, false, false, false, false,
    207     false, false, false, false, true,
    208     true}},
    209   // Cyrillic + 123 (Russian)
    210   {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
    211    {true,  false, false, false, false,
    212     false, false, true,  false, false,
    213     false, false, false, false, false,
    214     false, false, false, false, true,
    215     true}},
    216   // Arabic
    217   {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
    218    {true,  false, false, false, false,
    219     false, true,  false, false, false,
    220     false, false, false, false, false,
    221     false, false, false, false, false,
    222     false}},
    223   // Hebrew
    224   {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
    225    {true,  false, false, false, false,
    226     true,  false, false, false, false,
    227     false, false, false, false, false,
    228     false, false, false, false, true,
    229     false}},
    230   // Thai
    231   {"xn--12c2cc4ag3b4ccu.th",
    232    L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
    233    {true,  false, false, false, false,
    234     false, false, false, false, false,
    235     false, false, false, true,  false,
    236     false, false, false, false, false,
    237     false}},
    238   // Devangari (Hindi)
    239   {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
    240    {true,  false, false, false, false,
    241     false, false, false, false, false,
    242     false, false, false, false, true,
    243     false, false, false, false, false,
    244     false}},
    245   // Invalid IDN
    246   {"xn--hello?world.com", NULL,
    247    {false, false, false, false, false,
    248     false, false, false, false, false,
    249     false, false, false, false, false,
    250     false, false, false, false, false,
    251     false}},
    252   // Unsafe IDNs
    253   // "payp<alpha>l.com"
    254   {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
    255    {false, false, false, false, false,
    256     false, false, false, false, false,
    257     false, false, false, false, false,
    258     false, false, false, false, false,
    259     false}},
    260   // google.gr with Greek omicron and epsilon
    261   {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
    262    {false, false, false, false, false,
    263     false, false, false, false, false,
    264     false, false, false, false, false,
    265     false, false, false, false, false,
    266     false}},
    267   // google.ru with Cyrillic o
    268   {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
    269    {false, false, false, false, false,
    270     false, false, false, false, false,
    271     false, false, false, false, false,
    272     false, false, false, false, false,
    273     false}},
    274   // h<e with acute>llo<China in Han>.cn
    275   {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
    276    {false, false, false, false, false,
    277     false, false, false, false, false,
    278     false, false, false, false, false,
    279     false, false, false, false, false,
    280     false}},
    281   // <Greek rho><Cyrillic a><Cyrillic u>.ru
    282   {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
    283    {false, false, false, false, false,
    284     false, false, false, false, false,
    285     false, false, false, false, false,
    286     false, false, false, false, false,
    287     false}},
    288   // One that's really long that will force a buffer realloc
    289   {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
    290        "aaaaaaa",
    291    L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
    292        L"aaaaaaaa",
    293    {true,  true,  true,  true,  true,
    294     true,  true,  true,  true,  true,
    295     true,  true,  true,  true,  true,
    296     true,  true,  true,  true,  true,
    297     true}},
    298   // Test cases for characters we blacklisted although allowed in IDN.
    299   // Embedded spaces will be turned to %20 in the display.
    300   // TODO(jungshik): We need to have more cases. This is a typical
    301   // data-driven trap. The following test cases need to be separated
    302   // and tested only for a couple of languages.
    303   {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
    304     {false, false, false, false, false,
    305      false, false, false, false, false,
    306      false, false, false, false, false,
    307      false, false, false, false, false,
    308      false}},
    309   {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
    310     {false, false, false, false, false,
    311      false, false, false, false, false,
    312      false, false, false, false, false,
    313      false, false, false, false, false,
    314   }},
    315   {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
    316     {false, false, false, false, false,
    317      false, false, false, false, false,
    318      false, false, false, false, false,
    319      false, false, false, false, false,
    320   }},
    321   {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
    322     {false, false, false, false, false,
    323      false, false, false, false, false,
    324      false, false, false, false, false,
    325      false, false, false, false, false,
    326   }},
    327   {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
    328     {false, false, false, false, false,
    329      false, false, false, false, false,
    330      false, false, false, false, false,
    331      false, false, false, false, false,
    332   }},
    333 #if 0
    334   // These two cases are special. We need a separate test.
    335   // U+3000 and U+3002 are normalized to ASCII space and dot.
    336   {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn",
    337     {false, false, true,  false, false,
    338      false, false, false, false, false,
    339      false, false, false, false, false,
    340      false, false, true,  false, false,
    341      true}},
    342   {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn",
    343     {false, false, true,  false, false,
    344      false, false, false, false, false,
    345      false, false, false, false, false,
    346      false, false, true,  false, false,
    347      true}},
    348 #endif
    349 };
    350 
    351 struct AdjustOffsetCase {
    352   size_t input_offset;
    353   size_t output_offset;
    354 };
    355 
    356 struct UrlTestData {
    357   const char* description;
    358   const char* input;
    359   const char* languages;
    360   FormatUrlTypes format_types;
    361   UnescapeRule::Type escape_rules;
    362   const wchar_t* output;  // Use |wchar_t| to handle Unicode constants easily.
    363   size_t prefix_len;
    364 };
    365 
    366 // A helper for IDN*{Fast,Slow}.
    367 // Append "::<language list>" to |expected| and |actual| to make it
    368 // easy to tell which sub-case fails without debugging.
    369 void AppendLanguagesToOutputs(const char* languages,
    370                               base::string16* expected,
    371                               base::string16* actual) {
    372   base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages);
    373   expected->append(to_append);
    374   actual->append(to_append);
    375 }
    376 
    377 // A pair of helpers for the FormatUrlWithOffsets() test.
    378 void VerboseExpect(size_t expected,
    379                    size_t actual,
    380                    const std::string& original_url,
    381                    size_t position,
    382                    const base::string16& formatted_url) {
    383   EXPECT_EQ(expected, actual) << "Original URL: " << original_url
    384       << " (at char " << position << ")\nFormatted URL: " << formatted_url;
    385 }
    386 
    387 void CheckAdjustedOffsets(const std::string& url_string,
    388                           const std::string& languages,
    389                           FormatUrlTypes format_types,
    390                           UnescapeRule::Type unescape_rules,
    391                           const size_t* output_offsets) {
    392   GURL url(url_string);
    393   size_t url_length = url_string.length();
    394   std::vector<size_t> offsets;
    395   for (size_t i = 0; i <= url_length + 1; ++i)
    396     offsets.push_back(i);
    397   offsets.push_back(500000);  // Something larger than any input length.
    398   offsets.push_back(std::string::npos);
    399   base::string16 formatted_url = FormatUrlWithOffsets(url, languages,
    400       format_types, unescape_rules, NULL, NULL, &offsets);
    401   for (size_t i = 0; i < url_length; ++i)
    402     VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url);
    403   VerboseExpect(formatted_url.length(), offsets[url_length], url_string,
    404                 url_length, formatted_url);
    405   VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string,
    406                 500000, formatted_url);
    407   VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string,
    408                 std::string::npos, formatted_url);
    409 }
    410 
    411 }  // anonymous namespace
    412 
    413 TEST(NetUtilTest, IDNToUnicodeFast) {
    414   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
    415     for (size_t j = 0; j < arraysize(kLanguages); j++) {
    416       // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
    417       if (j == 3 || j == 17 || j == 18)
    418         continue;
    419       base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
    420       base::string16 expected(idn_cases[i].unicode_allowed[j] ?
    421           WideToUTF16(idn_cases[i].unicode_output) :
    422           ASCIIToUTF16(idn_cases[i].input));
    423       AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
    424       EXPECT_EQ(expected, output);
    425     }
    426   }
    427 }
    428 
    429 TEST(NetUtilTest, IDNToUnicodeSlow) {
    430   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
    431     for (size_t j = 0; j < arraysize(kLanguages); j++) {
    432       // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
    433       if (!(j == 3 || j == 17 || j == 18))
    434         continue;
    435       base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
    436       base::string16 expected(idn_cases[i].unicode_allowed[j] ?
    437           WideToUTF16(idn_cases[i].unicode_output) :
    438           ASCIIToUTF16(idn_cases[i].input));
    439       AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
    440       EXPECT_EQ(expected, output);
    441     }
    442   }
    443 }
    444 
    445 TEST(NetUtilTest, StripWWW) {
    446   EXPECT_EQ(base::string16(), StripWWW(base::string16()));
    447   EXPECT_EQ(base::string16(), StripWWW(ASCIIToUTF16("www.")));
    448   EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah")));
    449   EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah")));
    450 }
    451 
    452 // This is currently a windows specific function.
    453 #if defined(OS_WIN)
    454 namespace {
    455 
    456 struct GetDirectoryListingEntryCase {
    457   const wchar_t* name;
    458   const char* raw_bytes;
    459   bool is_dir;
    460   int64 filesize;
    461   base::Time time;
    462   const char* expected;
    463 };
    464 
    465 }  // namespace
    466 
    467 TEST(NetUtilTest, GetDirectoryListingEntry) {
    468   const GetDirectoryListingEntryCase test_cases[] = {
    469     {L"Foo",
    470      "",
    471      false,
    472      10000,
    473      base::Time(),
    474      "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"},
    475     {L"quo\"tes",
    476      "",
    477      false,
    478      10000,
    479      base::Time(),
    480      "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
    481          "\n"},
    482     {L"quo\"tes",
    483      "quo\"tes",
    484      false,
    485      10000,
    486      base::Time(),
    487      "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
    488          "\n"},
    489     // U+D55C0 U+AE00. raw_bytes is empty (either a local file with
    490     // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8
    491     {L"\xD55C\xAE00.txt",
    492      "",
    493      false,
    494      10000,
    495      base::Time(),
    496      "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\","
    497          "\"%ED%95%9C%EA%B8%80.txt\",0,\"9.8 kB\",\"\");</script>\n"},
    498     // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence:
    499     // a local or remote file in EUC-KR.
    500     {L"\xD55C\xAE00.txt",
    501      "\xC7\xD1\xB1\xDB.txt",
    502      false,
    503      10000,
    504      base::Time(),
    505      "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\",\"%C7%D1%B1%DB.txt\""
    506          ",0,\"9.8 kB\",\"\");</script>\n"},
    507   };
    508 
    509   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
    510     const std::string results = GetDirectoryListingEntry(
    511         WideToUTF16(test_cases[i].name),
    512         test_cases[i].raw_bytes,
    513         test_cases[i].is_dir,
    514         test_cases[i].filesize,
    515         test_cases[i].time);
    516     EXPECT_EQ(test_cases[i].expected, results);
    517   }
    518 }
    519 
    520 #endif
    521 
    522 TEST(NetUtilTest, FormatUrl) {
    523   FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
    524   const UrlTestData tests[] = {
    525     {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0},
    526 
    527     {"Simple URL",
    528      "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL,
    529      L"http://www.google.com/", 7},
    530 
    531     {"With a port number and a reference",
    532      "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type,
    533      UnescapeRule::NORMAL,
    534      L"http://www.google.com:8080/#\x30B0", 7},
    535 
    536     // -------- IDN tests --------
    537     {"Japanese IDN with ja",
    538      "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
    539      UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
    540 
    541     {"Japanese IDN with en",
    542      "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type,
    543      UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7},
    544 
    545     {"Japanese IDN without any languages",
    546      "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type,
    547      UnescapeRule::NORMAL,
    548      // Single script is safe for empty languages.
    549      L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
    550 
    551     {"mailto: with Japanese IDN",
    552      "mailto:foo (at) xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
    553      UnescapeRule::NORMAL,
    554      // GURL doesn't assume an email address's domain part as a host name.
    555      L"mailto:foo (at) xn--l8jvb1ey91xtjb.jp", 7},
    556 
    557     {"file: with Japanese IDN",
    558      "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
    559      UnescapeRule::NORMAL,
    560      L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
    561 
    562     {"ftp: with Japanese IDN",
    563      "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
    564      UnescapeRule::NORMAL,
    565      L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
    566 
    567     // -------- omit_username_password flag tests --------
    568     {"With username and password, omit_username_password=false",
    569      "http://user:passwd@example.com/foo", "",
    570      kFormatUrlOmitNothing, UnescapeRule::NORMAL,
    571      L"http://user:passwd@example.com/foo", 19},
    572 
    573     {"With username and password, omit_username_password=true",
    574      "http://user:passwd@example.com/foo", "", default_format_type,
    575      UnescapeRule::NORMAL, L"http://example.com/foo", 7},
    576 
    577     {"With username and no password",
    578      "http://user@example.com/foo", "", default_format_type,
    579      UnescapeRule::NORMAL, L"http://example.com/foo", 7},
    580 
    581     {"Just '@' without username and password",
    582      "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL,
    583      L"http://example.com/foo", 7},
    584 
    585     // GURL doesn't think local-part of an email address is username for URL.
    586     {"mailto:, omit_username_password=true",
    587      "mailto:foo (at) example.com", "", default_format_type, UnescapeRule::NORMAL,
    588      L"mailto:foo (at) example.com", 7},
    589 
    590     // -------- unescape flag tests --------
    591     {"Do not unescape",
    592      "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
    593      "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
    594      "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
    595      UnescapeRule::NONE,
    596      // GURL parses %-encoded hostnames into Punycode.
    597      L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
    598      L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
    599 
    600     {"Unescape normally",
    601      "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
    602      "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
    603      "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
    604      UnescapeRule::NORMAL,
    605      L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
    606      L"?q=\x30B0\x30FC\x30B0\x30EB", 7},
    607 
    608     {"Unescape normally with BiDi control character",
    609      "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", default_format_type,
    610      UnescapeRule::NORMAL, L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7},
    611 
    612     {"Unescape normally including unescape spaces",
    613      "http://www.google.com/search?q=Hello%20World", "en", default_format_type,
    614      UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7},
    615 
    616     /*
    617     {"unescape=true with some special characters",
    618     "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
    619     kFormatUrlOmitNothing, UnescapeRule::NORMAL,
    620     L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
    621     */
    622     // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
    623 
    624     // -------- omit http: --------
    625     {"omit http with user name",
    626      "http://user@example.com/foo", "", kFormatUrlOmitAll,
    627      UnescapeRule::NORMAL, L"example.com/foo", 0},
    628 
    629     {"omit http",
    630      "http://www.google.com/", "en", kFormatUrlOmitHTTP,
    631      UnescapeRule::NORMAL, L"www.google.com/",
    632      0},
    633 
    634     {"omit http with https",
    635      "https://www.google.com/", "en", kFormatUrlOmitHTTP,
    636      UnescapeRule::NORMAL, L"https://www.google.com/",
    637      8},
    638 
    639     {"omit http starts with ftp.",
    640      "http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
    641      UnescapeRule::NORMAL, L"http://ftp.google.com/",
    642      7},
    643 
    644     // -------- omit trailing slash on bare hostname --------
    645     {"omit slash when it's the entire path",
    646      "http://www.google.com/", "en",
    647      kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
    648      L"http://www.google.com", 7},
    649     {"omit slash when there's a ref",
    650      "http://www.google.com/#ref", "en",
    651      kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
    652      L"http://www.google.com/#ref", 7},
    653     {"omit slash when there's a query",
    654      "http://www.google.com/?", "en",
    655      kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
    656      L"http://www.google.com/?", 7},
    657     {"omit slash when it's not the entire path",
    658      "http://www.google.com/foo", "en",
    659      kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
    660      L"http://www.google.com/foo", 7},
    661     {"omit slash for nonstandard URLs",
    662      "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
    663      UnescapeRule::NORMAL, L"data:/", 5},
    664     {"omit slash for file URLs",
    665      "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
    666      UnescapeRule::NORMAL, L"file:///", 7},
    667 
    668     // -------- view-source: --------
    669     {"view-source",
    670      "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type,
    671      UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/",
    672      19},
    673 
    674     {"view-source of view-source",
    675      "view-source:view-source:http://xn--qcka1pmc.jp/", "ja",
    676      default_format_type, UnescapeRule::NORMAL,
    677      L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
    678 
    679     // view-source should omit http and trailing slash where non-view-source
    680     // would.
    681     {"view-source omit http",
    682      "view-source:http://a.b/c", "en", kFormatUrlOmitAll,
    683      UnescapeRule::NORMAL, L"view-source:a.b/c",
    684      12},
    685     {"view-source omit http starts with ftp.",
    686      "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll,
    687      UnescapeRule::NORMAL, L"view-source:http://ftp.b/c",
    688      19},
    689     {"view-source omit slash when it's the entire path",
    690      "view-source:http://a.b/", "en", kFormatUrlOmitAll,
    691      UnescapeRule::NORMAL, L"view-source:a.b",
    692      12},
    693   };
    694 
    695   for (size_t i = 0; i < arraysize(tests); ++i) {
    696     size_t prefix_len;
    697     base::string16 formatted = FormatUrl(
    698         GURL(tests[i].input), tests[i].languages, tests[i].format_types,
    699         tests[i].escape_rules, NULL, &prefix_len, NULL);
    700     EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
    701     EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
    702   }
    703 }
    704 
    705 TEST(NetUtilTest, FormatUrlParsed) {
    706   // No unescape case.
    707   url::Parsed parsed;
    708   base::string16 formatted = FormatUrl(
    709       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
    710            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
    711       "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL,
    712       NULL);
    713   EXPECT_EQ(WideToUTF16(
    714       L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
    715       L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
    716   EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
    717       formatted.substr(parsed.username.begin, parsed.username.len));
    718   EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
    719       formatted.substr(parsed.password.begin, parsed.password.len));
    720   EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
    721       formatted.substr(parsed.host.begin, parsed.host.len));
    722   EXPECT_EQ(WideToUTF16(L"8080"),
    723       formatted.substr(parsed.port.begin, parsed.port.len));
    724   EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"),
    725       formatted.substr(parsed.path.begin, parsed.path.len));
    726   EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"),
    727       formatted.substr(parsed.query.begin, parsed.query.len));
    728   EXPECT_EQ(WideToUTF16(L"\x30B0"),
    729       formatted.substr(parsed.ref.begin, parsed.ref.len));
    730 
    731   // Unescape case.
    732   formatted = FormatUrl(
    733       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
    734            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
    735       "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL,
    736       NULL);
    737   EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
    738       L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
    739   EXPECT_EQ(WideToUTF16(L"\x30B0"),
    740       formatted.substr(parsed.username.begin, parsed.username.len));
    741   EXPECT_EQ(WideToUTF16(L"\x30FC"),
    742       formatted.substr(parsed.password.begin, parsed.password.len));
    743   EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
    744       formatted.substr(parsed.host.begin, parsed.host.len));
    745   EXPECT_EQ(WideToUTF16(L"8080"),
    746       formatted.substr(parsed.port.begin, parsed.port.len));
    747   EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
    748       formatted.substr(parsed.path.begin, parsed.path.len));
    749   EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
    750       formatted.substr(parsed.query.begin, parsed.query.len));
    751   EXPECT_EQ(WideToUTF16(L"\x30B0"),
    752       formatted.substr(parsed.ref.begin, parsed.ref.len));
    753 
    754   // Omit_username_password + unescape case.
    755   formatted = FormatUrl(
    756       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
    757            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
    758       "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed,
    759       NULL, NULL);
    760   EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
    761       L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
    762   EXPECT_FALSE(parsed.username.is_valid());
    763   EXPECT_FALSE(parsed.password.is_valid());
    764   EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
    765       formatted.substr(parsed.host.begin, parsed.host.len));
    766   EXPECT_EQ(WideToUTF16(L"8080"),
    767       formatted.substr(parsed.port.begin, parsed.port.len));
    768   EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
    769       formatted.substr(parsed.path.begin, parsed.path.len));
    770   EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
    771       formatted.substr(parsed.query.begin, parsed.query.len));
    772   EXPECT_EQ(WideToUTF16(L"\x30B0"),
    773       formatted.substr(parsed.ref.begin, parsed.ref.len));
    774 
    775   // View-source case.
    776   formatted =
    777       FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"),
    778                 std::string(),
    779                 kFormatUrlOmitUsernamePassword,
    780                 UnescapeRule::NORMAL,
    781                 &parsed,
    782                 NULL,
    783                 NULL);
    784   EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"),
    785       formatted);
    786   EXPECT_EQ(WideToUTF16(L"view-source:http"),
    787       formatted.substr(parsed.scheme.begin, parsed.scheme.len));
    788   EXPECT_FALSE(parsed.username.is_valid());
    789   EXPECT_FALSE(parsed.password.is_valid());
    790   EXPECT_EQ(WideToUTF16(L"host"),
    791       formatted.substr(parsed.host.begin, parsed.host.len));
    792   EXPECT_EQ(WideToUTF16(L"81"),
    793       formatted.substr(parsed.port.begin, parsed.port.len));
    794   EXPECT_EQ(WideToUTF16(L"/path"),
    795       formatted.substr(parsed.path.begin, parsed.path.len));
    796   EXPECT_EQ(WideToUTF16(L"query"),
    797       formatted.substr(parsed.query.begin, parsed.query.len));
    798   EXPECT_EQ(WideToUTF16(L"ref"),
    799       formatted.substr(parsed.ref.begin, parsed.ref.len));
    800 
    801   // omit http case.
    802   formatted = FormatUrl(GURL("http://host:8000/a?b=c#d"),
    803                         std::string(),
    804                         kFormatUrlOmitHTTP,
    805                         UnescapeRule::NORMAL,
    806                         &parsed,
    807                         NULL,
    808                         NULL);
    809   EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted);
    810   EXPECT_FALSE(parsed.scheme.is_valid());
    811   EXPECT_FALSE(parsed.username.is_valid());
    812   EXPECT_FALSE(parsed.password.is_valid());
    813   EXPECT_EQ(WideToUTF16(L"host"),
    814       formatted.substr(parsed.host.begin, parsed.host.len));
    815   EXPECT_EQ(WideToUTF16(L"8000"),
    816       formatted.substr(parsed.port.begin, parsed.port.len));
    817   EXPECT_EQ(WideToUTF16(L"/a"),
    818       formatted.substr(parsed.path.begin, parsed.path.len));
    819   EXPECT_EQ(WideToUTF16(L"b=c"),
    820       formatted.substr(parsed.query.begin, parsed.query.len));
    821   EXPECT_EQ(WideToUTF16(L"d"),
    822       formatted.substr(parsed.ref.begin, parsed.ref.len));
    823 
    824   // omit http starts with ftp case.
    825   formatted = FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"),
    826                         std::string(),
    827                         kFormatUrlOmitHTTP,
    828                         UnescapeRule::NORMAL,
    829                         &parsed,
    830                         NULL,
    831                         NULL);
    832   EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted);
    833   EXPECT_TRUE(parsed.scheme.is_valid());
    834   EXPECT_FALSE(parsed.username.is_valid());
    835   EXPECT_FALSE(parsed.password.is_valid());
    836   EXPECT_EQ(WideToUTF16(L"http"),
    837       formatted.substr(parsed.scheme.begin, parsed.scheme.len));
    838   EXPECT_EQ(WideToUTF16(L"ftp.host"),
    839       formatted.substr(parsed.host.begin, parsed.host.len));
    840   EXPECT_EQ(WideToUTF16(L"8000"),
    841       formatted.substr(parsed.port.begin, parsed.port.len));
    842   EXPECT_EQ(WideToUTF16(L"/a"),
    843       formatted.substr(parsed.path.begin, parsed.path.len));
    844   EXPECT_EQ(WideToUTF16(L"b=c"),
    845       formatted.substr(parsed.query.begin, parsed.query.len));
    846   EXPECT_EQ(WideToUTF16(L"d"),
    847       formatted.substr(parsed.ref.begin, parsed.ref.len));
    848 
    849   // omit http starts with 'f' case.
    850   formatted = FormatUrl(GURL("http://f/"),
    851                         std::string(),
    852                         kFormatUrlOmitHTTP,
    853                         UnescapeRule::NORMAL,
    854                         &parsed,
    855                         NULL,
    856                         NULL);
    857   EXPECT_EQ(WideToUTF16(L"f/"), formatted);
    858   EXPECT_FALSE(parsed.scheme.is_valid());
    859   EXPECT_FALSE(parsed.username.is_valid());
    860   EXPECT_FALSE(parsed.password.is_valid());
    861   EXPECT_FALSE(parsed.port.is_valid());
    862   EXPECT_TRUE(parsed.path.is_valid());
    863   EXPECT_FALSE(parsed.query.is_valid());
    864   EXPECT_FALSE(parsed.ref.is_valid());
    865   EXPECT_EQ(WideToUTF16(L"f"),
    866       formatted.substr(parsed.host.begin, parsed.host.len));
    867   EXPECT_EQ(WideToUTF16(L"/"),
    868       formatted.substr(parsed.path.begin, parsed.path.len));
    869 }
    870 
    871 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
    872 // results in the original GURL, for each ASCII character in the path.
    873 TEST(NetUtilTest, FormatUrlRoundTripPathASCII) {
    874   for (unsigned char test_char = 32; test_char < 128; ++test_char) {
    875     GURL url(std::string("http://www.google.com/") +
    876              static_cast<char>(test_char));
    877     size_t prefix_len;
    878     base::string16 formatted = FormatUrl(url,
    879                                          std::string(),
    880                                          kFormatUrlOmitUsernamePassword,
    881                                          UnescapeRule::NORMAL,
    882                                          NULL,
    883                                          &prefix_len,
    884                                          NULL);
    885     EXPECT_EQ(url.spec(), GURL(formatted).spec());
    886   }
    887 }
    888 
    889 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
    890 // results in the original GURL, for each escaped ASCII character in the path.
    891 TEST(NetUtilTest, FormatUrlRoundTripPathEscaped) {
    892   for (unsigned char test_char = 32; test_char < 128; ++test_char) {
    893     std::string original_url("http://www.google.com/");
    894     original_url.push_back('%');
    895     original_url.append(base::HexEncode(&test_char, 1));
    896 
    897     GURL url(original_url);
    898     size_t prefix_len;
    899     base::string16 formatted = FormatUrl(url,
    900                                          std::string(),
    901                                          kFormatUrlOmitUsernamePassword,
    902                                          UnescapeRule::NORMAL,
    903                                          NULL,
    904                                          &prefix_len,
    905                                          NULL);
    906     EXPECT_EQ(url.spec(), GURL(formatted).spec());
    907   }
    908 }
    909 
    910 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
    911 // results in the original GURL, for each ASCII character in the query.
    912 TEST(NetUtilTest, FormatUrlRoundTripQueryASCII) {
    913   for (unsigned char test_char = 32; test_char < 128; ++test_char) {
    914     GURL url(std::string("http://www.google.com/?") +
    915              static_cast<char>(test_char));
    916     size_t prefix_len;
    917     base::string16 formatted = FormatUrl(url,
    918                                          std::string(),
    919                                          kFormatUrlOmitUsernamePassword,
    920                                          UnescapeRule::NORMAL,
    921                                          NULL,
    922                                          &prefix_len,
    923                                          NULL);
    924     EXPECT_EQ(url.spec(), GURL(formatted).spec());
    925   }
    926 }
    927 
    928 // Make sure that calling FormatUrl on a GURL and then converting back to a GURL
    929 // only results in a different GURL for certain characters.
    930 TEST(NetUtilTest, FormatUrlRoundTripQueryEscaped) {
    931   // A full list of characters which FormatURL should unescape and GURL should
    932   // not escape again, when they appear in a query string.
    933   const char* kUnescapedCharacters =
    934       "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~";
    935   for (unsigned char test_char = 0; test_char < 128; ++test_char) {
    936     std::string original_url("http://www.google.com/?");
    937     original_url.push_back('%');
    938     original_url.append(base::HexEncode(&test_char, 1));
    939 
    940     GURL url(original_url);
    941     size_t prefix_len;
    942     base::string16 formatted = FormatUrl(url,
    943                                          std::string(),
    944                                          kFormatUrlOmitUsernamePassword,
    945                                          UnescapeRule::NORMAL,
    946                                          NULL,
    947                                          &prefix_len,
    948                                          NULL);
    949 
    950     if (test_char &&
    951         strchr(kUnescapedCharacters, static_cast<char>(test_char))) {
    952       EXPECT_NE(url.spec(), GURL(formatted).spec());
    953     } else {
    954       EXPECT_EQ(url.spec(), GURL(formatted).spec());
    955     }
    956   }
    957 }
    958 
    959 TEST(NetUtilTest, FormatUrlWithOffsets) {
    960   CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing,
    961                        UnescapeRule::NORMAL, NULL);
    962 
    963   const size_t basic_offsets[] = {
    964     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
    965     21, 22, 23, 24, 25
    966   };
    967   CheckAdjustedOffsets("http://www.google.com/foo/", "en",
    968                        kFormatUrlOmitNothing, UnescapeRule::NORMAL,
    969                        basic_offsets);
    970 
    971   const size_t omit_auth_offsets_1[] = {
    972     0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7,
    973     8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
    974   };
    975   CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en",
    976                        kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
    977                        omit_auth_offsets_1);
    978 
    979   const size_t omit_auth_offsets_2[] = {
    980     0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14,
    981     15, 16, 17, 18, 19, 20, 21
    982   };
    983   CheckAdjustedOffsets("http://foo@www.google.com/", "en",
    984                        kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
    985                        omit_auth_offsets_2);
    986 
    987   const size_t dont_omit_auth_offsets[] = {
    988     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
    989     kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
    990     kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
    991     30, 31
    992   };
    993   // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com".
    994   CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en",
    995                        kFormatUrlOmitNothing, UnescapeRule::NORMAL,
    996                        dont_omit_auth_offsets);
    997 
    998   const size_t view_source_offsets[] = {
    999     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos,
   1000     kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33
   1001   };
   1002   CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en",
   1003                        kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
   1004                        view_source_offsets);
   1005 
   1006   const size_t idn_hostname_offsets_1[] = {
   1007     0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
   1008     kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12,
   1009     13, 14, 15, 16, 17, 18, 19
   1010   };
   1011   // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/".
   1012   CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja",
   1013                        kFormatUrlOmitNothing, UnescapeRule::NORMAL,
   1014                        idn_hostname_offsets_1);
   1015 
   1016   const size_t idn_hostname_offsets_2[] = {
   1017     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos,
   1018     kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos,
   1019     kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
   1020     kNpos, 19, 20, 21, 22, 23, 24
   1021   };
   1022   // Convert punycode to
   1023   // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/".
   1024   CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/",
   1025                        "zh-CN", kFormatUrlOmitNothing, UnescapeRule::NORMAL,
   1026                        idn_hostname_offsets_2);
   1027 
   1028   const size_t unescape_offsets[] = {
   1029     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
   1030     21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos,
   1031     kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos,
   1032     kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
   1033     kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos
   1034   };
   1035   // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB".
   1036   CheckAdjustedOffsets(
   1037       "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
   1038       "en", kFormatUrlOmitNothing, UnescapeRule::SPACES, unescape_offsets);
   1039 
   1040   const size_t ref_offsets[] = {
   1041     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
   1042     21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos,
   1043     33
   1044   };
   1045   // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z".
   1046   CheckAdjustedOffsets(
   1047       "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en",
   1048       kFormatUrlOmitNothing, UnescapeRule::NORMAL, ref_offsets);
   1049 
   1050   const size_t omit_http_offsets[] = {
   1051     0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
   1052     10, 11, 12, 13, 14
   1053   };
   1054   CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP,
   1055                        UnescapeRule::NORMAL, omit_http_offsets);
   1056 
   1057   const size_t omit_http_start_with_ftp_offsets[] = {
   1058     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
   1059   };
   1060   CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
   1061                        UnescapeRule::NORMAL, omit_http_start_with_ftp_offsets);
   1062 
   1063   const size_t omit_all_offsets[] = {
   1064     0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
   1065     0, 1, 2, 3, 4, 5, 6, 7
   1066   };
   1067   CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll,
   1068                        UnescapeRule::NORMAL, omit_all_offsets);
   1069 }
   1070 
   1071 }  // namespace net
   1072