Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/base/net_util.h"
      6 
      7 #include <algorithm>
      8 
      9 #include "base/file_path.h"
     10 #include "base/format_macros.h"
     11 #include "base/string_number_conversions.h"
     12 #include "base/string_util.h"
     13 #include "base/stringprintf.h"
     14 #include "base/sys_string_conversions.h"
     15 #include "base/test/test_file_util.h"
     16 #include "base/time.h"
     17 #include "base/utf_string_conversions.h"
     18 #include "googleurl/src/gurl.h"
     19 #include "net/base/sys_addrinfo.h"
     20 #include "testing/gtest/include/gtest/gtest.h"
     21 
     22 namespace net {
     23 
     24 namespace {
     25 
     26 static const size_t kNpos = string16::npos;
     27 
     28 struct FileCase {
     29   const wchar_t* file;
     30   const char* url;
     31 };
     32 
     33 struct HeaderCase {
     34   const wchar_t* header_name;
     35   const wchar_t* expected;
     36 };
     37 
     38 struct HeaderParamCase {
     39   const wchar_t* header_name;
     40   const wchar_t* param_name;
     41   const wchar_t* expected;
     42 };
     43 
     44 struct FileNameCDCase {
     45   const char* header_field;
     46   const char* referrer_charset;
     47   const wchar_t* expected;
     48 };
     49 
     50 const wchar_t* kLanguages[] = {
     51   L"",      L"en",    L"zh-CN",       L"ja",    L"ko",
     52   L"he",    L"ar",    L"ru",          L"el",    L"fr",
     53   L"de",    L"pt",    L"sv",          L"th",    L"hi",
     54   L"de,en", L"el,en", L"zh-TW,en",    L"ko,ja", L"he,ru,en",
     55   L"zh,ru,en"
     56 };
     57 
     58 struct IDNTestCase {
     59   const char* input;
     60   const wchar_t* unicode_output;
     61   const bool unicode_allowed[arraysize(kLanguages)];
     62 };
     63 
     64 // TODO(jungshik) This is just a random sample of languages and is far
     65 // from exhaustive.  We may have to generate all the combinations
     66 // of languages (powerset of a set of all the languages).
     67 const IDNTestCase idn_cases[] = {
     68   // No IDN
     69   {"www.google.com", L"www.google.com",
     70    {true,  true,  true,  true,  true,
     71     true,  true,  true,  true,  true,
     72     true,  true,  true,  true,  true,
     73     true,  true,  true,  true,  true,
     74     true}},
     75   {"www.google.com.", L"www.google.com.",
     76    {true,  true,  true,  true,  true,
     77     true,  true,  true,  true,  true,
     78     true,  true,  true,  true,  true,
     79     true,  true,  true,  true,  true,
     80     true}},
     81   {".", L".",
     82    {true,  true,  true,  true,  true,
     83     true,  true,  true,  true,  true,
     84     true,  true,  true,  true,  true,
     85     true,  true,  true,  true,  true,
     86     true}},
     87   {"", L"",
     88    {true,  true,  true,  true,  true,
     89     true,  true,  true,  true,  true,
     90     true,  true,  true,  true,  true,
     91     true,  true,  true,  true,  true,
     92     true}},
     93   // IDN
     94   // Hanzi (Traditional Chinese)
     95   {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn",
     96    {true,  false, true,  true,  false,
     97     false, false, false, false, false,
     98     false, false, false, false, false,
     99     false, false, true,  true,  false,
    100     true}},
    101   // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
    102   {"xn--cy2a840a.com", L"\x89c6\x9891.com",
    103    {true,  false, true,  false,  false,
    104     false, false, false, false, false,
    105     false, false, false, false, false,
    106     false, false, false, false,  false,
    107     true}},
    108   // Hanzi + '123'
    109   {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com",
    110    {true,  false, true,  true,  false,
    111     false, false, false, false, false,
    112     false, false, false, false, false,
    113     false, false, true,  true,  false,
    114     true}},
    115   // Hanzi + Latin : U+56FD is simplified and is regarded
    116   // as not supported in zh-TW.
    117   {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
    118    {false, false, true,  true,  false,
    119     false, false, false, false, false,
    120     false, false, false, false, false,
    121     false, false, false, true,  false,
    122     true}},
    123   // Kanji + Kana (Japanese)
    124   {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp",
    125    {true,  false, false, true,  false,
    126     false, false, false, false, false,
    127     false, false, false, false, false,
    128     false, false, false, true,  false,
    129     false}},
    130   // Katakana including U+30FC
    131   {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp",
    132    {true, false, false, true,  false,
    133     false, false, false, false, false,
    134     false, false, false, false, false,
    135     false, false, false, true, false,
    136     }},
    137   {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
    138    {true, false, false, true,  false,
    139     false, false, false, false, false,
    140     false, false, false, false, false,
    141     false, false, false, true, false,
    142     }},
    143   // Katakana + Latin (Japanese)
    144   // TODO(jungshik): Change 'false' in the first element to 'true'
    145   // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead
    146   // of our IsIDNComponentInSingleScript().
    147   {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
    148    {false, false, false, true,  false,
    149     false, false, false, false, false,
    150     false, false, false, false, false,
    151     false, false, false, true, false,
    152     }},
    153   {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
    154    {false, false, false, true,  false,
    155     false, false, false, false, false,
    156     false, false, false, false, false,
    157     false, false, false, true, false,
    158     }},
    159   // Hangul (Korean)
    160   {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr",
    161    {true,  false, false, false, true,
    162     false, false, false, false, false,
    163     false, false, false, false, false,
    164     false, false, false, true,  false,
    165     false}},
    166   // b<u-umlaut>cher (German)
    167   {"xn--bcher-kva.de", L"b\x00fc" L"cher.de",
    168    {true,  false, false, false, false,
    169     false, false, false, false, true,
    170     true,  false,  false, false, false,
    171     true,  false, false, false, false,
    172     false}},
    173   // a with diaeresis
    174   {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
    175    {true,  false, false, false, false,
    176     false, false, false, false, false,
    177     true,  false, true, false, false,
    178     true,  false, false, false, false,
    179     false}},
    180   // c-cedilla (French)
    181   {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
    182    {true,  false, false, false, false,
    183     false, false, false, false, true,
    184     false, true,  false, false, false,
    185     false, false, false, false, false,
    186     false}},
    187   // caf'e with acute accent' (French)
    188   {"xn--caf-dma.fr", L"caf\x00e9.fr",
    189    {true,  false, false, false, false,
    190     false, false, false, false, true,
    191     false, true,  true,  false, false,
    192     false, false, false, false, false,
    193     false}},
    194   // c-cedillla and a with tilde (Portuguese)
    195   {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
    196    {true,  false, false, false, false,
    197     false, false, false, false, false,
    198     false, true,  false, false, false,
    199     false, false, false, false, false,
    200     false}},
    201   // s with caron
    202   {"xn--achy-f6a.com", L"\x0161" L"achy.com",
    203    {true,  false, false, false, false,
    204     false, false, false, false, false,
    205     false, false, false, false, false,
    206     false, false, false, false, false,
    207     false}},
    208   // TODO(jungshik) : Add examples with Cyrillic letters
    209   // only used in some languages written in Cyrillic.
    210   // Eutopia (Greek)
    211   {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
    212    {true,  false, false, false, false,
    213     false, false, false, true,  false,
    214     false, false, false, false, false,
    215     false, true,  false, false, false,
    216     false}},
    217   // Eutopia + 123 (Greek)
    218   {"xn---123-pldm0haj2bk.gr",
    219    L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
    220    {true,  false, false, false, false,
    221     false, false, false, true,  false,
    222     false, false, false, false, false,
    223     false, true,  false, false, false,
    224     false}},
    225   // Cyrillic (Russian)
    226   {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
    227    {true,  false, false, false, false,
    228     false, false, true,  false, false,
    229     false, false, false, false, false,
    230     false, false, false, false, true,
    231     true}},
    232   // Cyrillic + 123 (Russian)
    233   {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
    234    {true,  false, false, false, false,
    235     false, false, true,  false, false,
    236     false, false, false, false, false,
    237     false, false, false, false, true,
    238     true}},
    239   // Arabic
    240   {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
    241    {true,  false, false, false, false,
    242     false, true,  false, false, false,
    243     false, false, false, false, false,
    244     false, false, false, false, false,
    245     false}},
    246   // Hebrew
    247   {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
    248    {true,  false, false, false, false,
    249     true,  false, false, false, false,
    250     false, false, false, false, false,
    251     false, false, false, false, true,
    252     false}},
    253   // Thai
    254   {"xn--12c2cc4ag3b4ccu.th",
    255    L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
    256    {true,  false, false, false, false,
    257     false, false, false, false, false,
    258     false, false, false, true,  false,
    259     false, false, false, false, false,
    260     false}},
    261   // Devangari (Hindi)
    262   {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
    263    {true,  false, false, false, false,
    264     false, false, false, false, false,
    265     false, false, false, false, true,
    266     false, false, false, false, false,
    267     false}},
    268   // Invalid IDN
    269   {"xn--hello?world.com", NULL,
    270    {false, false, false, false, false,
    271     false, false, false, false, false,
    272     false, false, false, false, false,
    273     false, false, false, false, false,
    274     false}},
    275   // Unsafe IDNs
    276   // "payp<alpha>l.com"
    277   {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
    278    {false, false, false, false, false,
    279     false, false, false, false, false,
    280     false, false, false, false, false,
    281     false, false, false, false, false,
    282     false}},
    283   // google.gr with Greek omicron and epsilon
    284   {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
    285    {false, false, false, false, false,
    286     false, false, false, false, false,
    287     false, false, false, false, false,
    288     false, false, false, false, false,
    289     false}},
    290   // google.ru with Cyrillic o
    291   {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
    292    {false, false, false, false, false,
    293     false, false, false, false, false,
    294     false, false, false, false, false,
    295     false, false, false, false, false,
    296     false}},
    297   // h<e with acute>llo<China in Han>.cn
    298   {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
    299    {false, false, false, false, false,
    300     false, false, false, false, false,
    301     false, false, false, false, false,
    302     false, false, false, false, false,
    303     false}},
    304   // <Greek rho><Cyrillic a><Cyrillic u>.ru
    305   {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
    306    {false, false, false, false, false,
    307     false, false, false, false, false,
    308     false, false, false, false, false,
    309     false, false, false, false, false,
    310     false}},
    311   // One that's really long that will force a buffer realloc
    312   {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
    313        "aaaaaaa",
    314    L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
    315        L"aaaaaaaa",
    316    {true,  true,  true,  true,  true,
    317     true,  true,  true,  true,  true,
    318     true,  true,  true,  true,  true,
    319     true,  true,  true,  true,  true,
    320     true}},
    321   // Test cases for characters we blacklisted although allowed in IDN.
    322   // Embedded spaces will be turned to %20 in the display.
    323   // TODO(jungshik): We need to have more cases. This is a typical
    324   // data-driven trap. The following test cases need to be separated
    325   // and tested only for a couple of languages.
    326   {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
    327     {false, false, false, false, false,
    328      false, false, false, false, false,
    329      false, false, false, false, false,
    330      false, false, false, false, false,
    331      false}},
    332   {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
    333     {false, false, false, false, false,
    334      false, false, false, false, false,
    335      false, false, false, false, false,
    336      false, false, false, false, false,
    337   }},
    338   {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
    339     {false, false, false, false, false,
    340      false, false, false, false, false,
    341      false, false, false, false, false,
    342      false, false, false, false, false,
    343   }},
    344   {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
    345     {false, false, false, false, false,
    346      false, false, false, false, false,
    347      false, false, false, false, false,
    348      false, false, false, false, false,
    349   }},
    350   {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
    351     {false, false, false, false, false,
    352      false, false, false, false, false,
    353      false, false, false, false, false,
    354      false, false, false, false, false,
    355   }},
    356 #if 0
    357   // These two cases are special. We need a separate test.
    358   // U+3000 and U+3002 are normalized to ASCII space and dot.
    359   {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn",
    360     {false, false, true,  false, false,
    361      false, false, false, false, false,
    362      false, false, false, false, false,
    363      false, false, true,  false, false,
    364      true}},
    365   {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn",
    366     {false, false, true,  false, false,
    367      false, false, false, false, false,
    368      false, false, false, false, false,
    369      false, false, true,  false, false,
    370      true}},
    371 #endif
    372 };
    373 
    374 struct AdjustOffsetCase {
    375   size_t input_offset;
    376   size_t output_offset;
    377 };
    378 
    379 struct CompliantHostCase {
    380   const char* host;
    381   const char* desired_tld;
    382   bool expected_output;
    383 };
    384 
    385 struct SuggestedFilenameCase {
    386   const char* url;
    387   const char* content_disp_header;
    388   const char* referrer_charset;
    389   const wchar_t* default_filename;
    390   const wchar_t* expected_filename;
    391 };
    392 
    393 struct UrlTestData {
    394   const char* description;
    395   const char* input;
    396   const char* languages;
    397   FormatUrlTypes format_types;
    398   UnescapeRule::Type escape_rules;
    399   const wchar_t* output;  // Use |wchar_t| to handle Unicode constants easily.
    400   size_t prefix_len;
    401 };
    402 
    403 // Returns an addrinfo for the given 32-bit address (IPv4.)
    404 // The result lives in static storage, so don't delete it.
    405 // |bytes| should be an array of length 4.
    406 const struct addrinfo* GetIPv4Address(const uint8* bytes, int port) {
    407   static struct addrinfo static_ai;
    408   static struct sockaddr_in static_addr4;
    409 
    410   struct addrinfo* ai = &static_ai;
    411   ai->ai_socktype = SOCK_STREAM;
    412   memset(ai, 0, sizeof(static_ai));
    413 
    414   ai->ai_family = AF_INET;
    415   ai->ai_addrlen = sizeof(static_addr4);
    416 
    417   struct sockaddr_in* addr4 = &static_addr4;
    418   memset(addr4, 0, sizeof(static_addr4));
    419   addr4->sin_port = htons(port);
    420   addr4->sin_family = ai->ai_family;
    421   memcpy(&addr4->sin_addr, bytes, 4);
    422 
    423   ai->ai_addr = (sockaddr*)addr4;
    424   return ai;
    425 }
    426 
    427 // Returns a addrinfo for the given 128-bit address (IPv6.)
    428 // The result lives in static storage, so don't delete it.
    429 // |bytes| should be an array of length 16.
    430 const struct addrinfo* GetIPv6Address(const uint8* bytes, int port) {
    431   static struct addrinfo static_ai;
    432   static struct sockaddr_in6 static_addr6;
    433 
    434   struct addrinfo* ai = &static_ai;
    435   ai->ai_socktype = SOCK_STREAM;
    436   memset(ai, 0, sizeof(static_ai));
    437 
    438   ai->ai_family = AF_INET6;
    439   ai->ai_addrlen = sizeof(static_addr6);
    440 
    441   struct sockaddr_in6* addr6 = &static_addr6;
    442   memset(addr6, 0, sizeof(static_addr6));
    443   addr6->sin6_port = htons(port);
    444   addr6->sin6_family = ai->ai_family;
    445   memcpy(&addr6->sin6_addr, bytes, 16);
    446 
    447   ai->ai_addr = (sockaddr*)addr6;
    448   return ai;
    449 }
    450 
    451 // A helper for IDN*{Fast,Slow}.
    452 // Append "::<language list>" to |expected| and |actual| to make it
    453 // easy to tell which sub-case fails without debugging.
    454 void AppendLanguagesToOutputs(const wchar_t* languages,
    455                               std::wstring* expected,
    456                               std::wstring* actual) {
    457   expected->append(L"::");
    458   expected->append(languages);
    459   actual->append(L"::");
    460   actual->append(languages);
    461 }
    462 
    463 // Helper to strignize an IP number (used to define expectations).
    464 std::string DumpIPNumber(const IPAddressNumber& v) {
    465   std::string out;
    466   for (size_t i = 0; i < v.size(); ++i) {
    467     if (i != 0)
    468       out.append(",");
    469     out.append(base::IntToString(static_cast<int>(v[i])));
    470   }
    471   return out;
    472 }
    473 
    474 }  // anonymous namespace
    475 
    476 TEST(NetUtilTest, FileURLConversion) {
    477   // a list of test file names and the corresponding URLs
    478   const FileCase round_trip_cases[] = {
    479 #if defined(OS_WIN)
    480     {L"C:\\foo\\bar.txt", "file:///C:/foo/bar.txt"},
    481     {L"\\\\some computer\\foo\\bar.txt",
    482      "file://some%20computer/foo/bar.txt"}, // UNC
    483     {L"D:\\Name;with%some symbols*#",
    484      "file:///D:/Name%3Bwith%25some%20symbols*%23"},
    485     // issue 14153: To be tested with the OS default codepage other than 1252.
    486     {L"D:\\latin1\\caf\x00E9\x00DD.txt",
    487      "file:///D:/latin1/caf%C3%A9%C3%9D.txt"},
    488     {L"D:\\otherlatin\\caf\x0119.txt",
    489      "file:///D:/otherlatin/caf%C4%99.txt"},
    490     {L"D:\\greek\\\x03B1\x03B2\x03B3.txt",
    491      "file:///D:/greek/%CE%B1%CE%B2%CE%B3.txt"},
    492     {L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
    493      "file:///D:/Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD%91"
    494          "%E9%A1%B5.doc"},
    495     {L"D:\\plane1\\\xD835\xDC00\xD835\xDC01.txt",  // Math alphabet "AB"
    496      "file:///D:/plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
    497 #elif defined(OS_POSIX)
    498     {L"/foo/bar.txt", "file:///foo/bar.txt"},
    499     {L"/foo/BAR.txt", "file:///foo/BAR.txt"},
    500     {L"/C:/foo/bar.txt", "file:///C:/foo/bar.txt"},
    501     {L"/some computer/foo/bar.txt", "file:///some%20computer/foo/bar.txt"},
    502     {L"/Name;with%some symbols*#", "file:///Name%3Bwith%25some%20symbols*%23"},
    503     {L"/latin1/caf\x00E9\x00DD.txt", "file:///latin1/caf%C3%A9%C3%9D.txt"},
    504     {L"/otherlatin/caf\x0119.txt", "file:///otherlatin/caf%C4%99.txt"},
    505     {L"/greek/\x03B1\x03B2\x03B3.txt", "file:///greek/%CE%B1%CE%B2%CE%B3.txt"},
    506     {L"/Chinese/\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
    507      "file:///Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD"
    508          "%91%E9%A1%B5.doc"},
    509     {L"/plane1/\x1D400\x1D401.txt",  // Math alphabet "AB"
    510      "file:///plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
    511 #endif
    512   };
    513 
    514   // First, we'll test that we can round-trip all of the above cases of URLs
    515   FilePath output;
    516   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(round_trip_cases); i++) {
    517     // convert to the file URL
    518     GURL file_url(FilePathToFileURL(
    519                       file_util::WStringAsFilePath(round_trip_cases[i].file)));
    520     EXPECT_EQ(round_trip_cases[i].url, file_url.spec());
    521 
    522     // Back to the filename.
    523     EXPECT_TRUE(FileURLToFilePath(file_url, &output));
    524     EXPECT_EQ(round_trip_cases[i].file, file_util::FilePathAsWString(output));
    525   }
    526 
    527   // Test that various file: URLs get decoded into the correct file type
    528   FileCase url_cases[] = {
    529 #if defined(OS_WIN)
    530     {L"C:\\foo\\bar.txt", "file:c|/foo\\bar.txt"},
    531     {L"C:\\foo\\bar.txt", "file:/c:/foo/bar.txt"},
    532     {L"\\\\foo\\bar.txt", "file://foo\\bar.txt"},
    533     {L"C:\\foo\\bar.txt", "file:///c:/foo/bar.txt"},
    534     {L"\\\\foo\\bar.txt", "file:////foo\\bar.txt"},
    535     {L"\\\\foo\\bar.txt", "file:/foo/bar.txt"},
    536     {L"\\\\foo\\bar.txt", "file://foo\\bar.txt"},
    537     {L"C:\\foo\\bar.txt", "file:\\\\\\c:/foo/bar.txt"},
    538 #elif defined(OS_POSIX)
    539     {L"/c:/foo/bar.txt", "file:/c:/foo/bar.txt"},
    540     {L"/c:/foo/bar.txt", "file:///c:/foo/bar.txt"},
    541     {L"/foo/bar.txt", "file:/foo/bar.txt"},
    542     {L"/c:/foo/bar.txt", "file:\\\\\\c:/foo/bar.txt"},
    543     {L"/foo/bar.txt", "file:foo/bar.txt"},
    544     {L"/bar.txt", "file://foo/bar.txt"},
    545     {L"/foo/bar.txt", "file:///foo/bar.txt"},
    546     {L"/foo/bar.txt", "file:////foo/bar.txt"},
    547     {L"/foo/bar.txt", "file:////foo//bar.txt"},
    548     {L"/foo/bar.txt", "file:////foo///bar.txt"},
    549     {L"/foo/bar.txt", "file:////foo////bar.txt"},
    550     {L"/c:/foo/bar.txt", "file:\\\\\\c:/foo/bar.txt"},
    551     {L"/c:/foo/bar.txt", "file:c:/foo/bar.txt"},
    552     // We get these wrong because GURL turns back slashes into forward
    553     // slashes.
    554     //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"},
    555     //{L"/c|/foo%5Cbar.txt", "file:c|/foo\\bar.txt"},
    556     //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"},
    557     //{L"/foo%5Cbar.txt", "file:////foo\\bar.txt"},
    558     //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"},
    559 #endif
    560   };
    561   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(url_cases); i++) {
    562     FileURLToFilePath(GURL(url_cases[i].url), &output);
    563     EXPECT_EQ(url_cases[i].file, file_util::FilePathAsWString(output));
    564   }
    565 
    566   // Unfortunately, UTF8ToWide discards invalid UTF8 input.
    567 #ifdef BUG_878908_IS_FIXED
    568   // Test that no conversion happens if the UTF-8 input is invalid, and that
    569   // the input is preserved in UTF-8
    570   const char invalid_utf8[] = "file:///d:/Blah/\xff.doc";
    571   const wchar_t invalid_wide[] = L"D:\\Blah\\\xff.doc";
    572   EXPECT_TRUE(FileURLToFilePath(
    573       GURL(std::string(invalid_utf8)), &output));
    574   EXPECT_EQ(std::wstring(invalid_wide), output);
    575 #endif
    576 
    577   // Test that if a file URL is malformed, we get a failure
    578   EXPECT_FALSE(FileURLToFilePath(GURL("filefoobar"), &output));
    579 }
    580 
    581 TEST(NetUtilTest, GetIdentityFromURL) {
    582   struct {
    583     const char* input_url;
    584     const char* expected_username;
    585     const char* expected_password;
    586   } tests[] = {
    587     {
    588       "http://username:password@google.com",
    589       "username",
    590       "password",
    591     },
    592     { // Test for http://crbug.com/19200
    593       "http://username:p@ssword@google.com",
    594       "username",
    595       "p@ssword",
    596     },
    597     { // Special URL characters should be unescaped.
    598       "http://username:p%3fa%26s%2fs%23@google.com",
    599       "username",
    600       "p?a&s/s#",
    601     },
    602     { // Username contains %20.
    603       "http://use rname:password (at) google.com",
    604       "use rname",
    605       "password",
    606     },
    607     { // Keep %00 as is.
    608       "http://use%00rname:password@google.com",
    609       "use%00rname",
    610       "password",
    611     },
    612     { // Use a '+' in the username.
    613       "http://use+rname:password@google.com",
    614       "use+rname",
    615       "password",
    616     },
    617     { // Use a '&' in the password.
    618       "http://username:p&ssword@google.com",
    619       "username",
    620       "p&ssword",
    621     },
    622   };
    623   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
    624     SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s", i,
    625                                     tests[i].input_url));
    626     GURL url(tests[i].input_url);
    627 
    628     string16 username, password;
    629     GetIdentityFromURL(url, &username, &password);
    630 
    631     EXPECT_EQ(ASCIIToUTF16(tests[i].expected_username), username);
    632     EXPECT_EQ(ASCIIToUTF16(tests[i].expected_password), password);
    633   }
    634 }
    635 
    636 // Try extracting a username which was encoded with UTF8.
    637 TEST(NetUtilTest, GetIdentityFromURL_UTF8) {
    638   GURL url(WideToUTF16(L"http://foo:\x4f60\x597d@blah.com"));
    639 
    640   EXPECT_EQ("foo", url.username());
    641   EXPECT_EQ("%E4%BD%A0%E5%A5%BD", url.password());
    642 
    643   // Extract the unescaped identity.
    644   string16 username, password;
    645   GetIdentityFromURL(url, &username, &password);
    646 
    647   // Verify that it was decoded as UTF8.
    648   EXPECT_EQ(ASCIIToUTF16("foo"), username);
    649   EXPECT_EQ(WideToUTF16(L"\x4f60\x597d"), password);
    650 }
    651 
    652 // Just a bunch of fake headers.
    653 const wchar_t* google_headers =
    654     L"HTTP/1.1 200 OK\n"
    655     L"Content-TYPE: text/html; charset=utf-8\n"
    656     L"Content-disposition: attachment; filename=\"download.pdf\"\n"
    657     L"Content-Length: 378557\n"
    658     L"X-Google-Google1: 314159265\n"
    659     L"X-Google-Google2: aaaa2:7783,bbb21:9441\n"
    660     L"X-Google-Google4: home\n"
    661     L"Transfer-Encoding: chunked\n"
    662     L"Set-Cookie: HEHE_AT=6666x66beef666x6-66xx6666x66; Path=/mail\n"
    663     L"Set-Cookie: HEHE_HELP=owned:0;Path=/\n"
    664     L"Set-Cookie: S=gmail=Xxx-beefbeefbeef_beefb:gmail_yj=beefbeef000beefbee"
    665         L"fbee:gmproxy=bee-fbeefbe; Domain=.google.com; Path=/\n"
    666     L"X-Google-Google2: /one/two/three/four/five/six/seven-height/nine:9411\n"
    667     L"Server: GFE/1.3\n"
    668     L"Transfer-Encoding: chunked\n"
    669     L"Date: Mon, 13 Nov 2006 21:38:09 GMT\n"
    670     L"Expires: Tue, 14 Nov 2006 19:23:58 GMT\n"
    671     L"X-Malformed: bla; arg=test\"\n"
    672     L"X-Malformed2: bla; arg=\n"
    673     L"X-Test: bla; arg1=val1; arg2=val2";
    674 
    675 TEST(NetUtilTest, GetSpecificHeader) {
    676   const HeaderCase tests[] = {
    677     {L"content-type", L"text/html; charset=utf-8"},
    678     {L"CONTENT-LENGTH", L"378557"},
    679     {L"Date", L"Mon, 13 Nov 2006 21:38:09 GMT"},
    680     {L"Bad-Header", L""},
    681     {L"", L""},
    682   };
    683 
    684   // Test first with google_headers.
    685   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
    686     std::wstring result = GetSpecificHeader(google_headers,
    687                                                  tests[i].header_name);
    688     EXPECT_EQ(result, tests[i].expected);
    689   }
    690 
    691   // Test again with empty headers.
    692   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
    693     std::wstring result = GetSpecificHeader(L"", tests[i].header_name);
    694     EXPECT_EQ(result, std::wstring());
    695   }
    696 }
    697 
    698 TEST(NetUtilTest, GetHeaderParamValue) {
    699   const HeaderParamCase tests[] = {
    700     {L"Content-type", L"charset", L"utf-8"},
    701     {L"content-disposition", L"filename", L"download.pdf"},
    702     {L"Content-Type", L"badparam", L""},
    703     {L"X-Malformed", L"arg", L"test\""},
    704     {L"X-Malformed2", L"arg", L""},
    705     {L"X-Test", L"arg1", L"val1"},
    706     {L"X-Test", L"arg2", L"val2"},
    707     {L"Bad-Header", L"badparam", L""},
    708     {L"Bad-Header", L"", L""},
    709     {L"", L"badparam", L""},
    710     {L"", L"", L""},
    711   };
    712   // TODO(mpcomplete): add tests for other formats of headers.
    713 
    714   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
    715     std::wstring header_value =
    716         GetSpecificHeader(google_headers, tests[i].header_name);
    717     std::wstring result =
    718         GetHeaderParamValue(header_value, tests[i].param_name,
    719                             QuoteRule::REMOVE_OUTER_QUOTES);
    720     EXPECT_EQ(result, tests[i].expected);
    721   }
    722 
    723   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
    724     std::wstring header_value =
    725         GetSpecificHeader(L"", tests[i].header_name);
    726     std::wstring result =
    727         GetHeaderParamValue(header_value, tests[i].param_name,
    728                             QuoteRule::REMOVE_OUTER_QUOTES);
    729     EXPECT_EQ(result, std::wstring());
    730   }
    731 }
    732 
    733 TEST(NetUtilTest, GetHeaderParamValueQuotes) {
    734   struct {
    735     const char* header;
    736     const char* expected_with_quotes;
    737     const char* expected_without_quotes;
    738   } tests[] = {
    739     {"filename=foo", "foo", "foo"},
    740     {"filename=\"foo\"", "\"foo\"", "foo"},
    741     {"filename=foo\"", "foo\"", "foo\""},
    742     {"filename=fo\"o", "fo\"o", "fo\"o"},
    743   };
    744 
    745   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
    746     std::string actual_with_quotes =
    747         GetHeaderParamValue(tests[i].header, "filename",
    748                             QuoteRule::KEEP_OUTER_QUOTES);
    749     std::string actual_without_quotes =
    750         GetHeaderParamValue(tests[i].header, "filename",
    751                             QuoteRule::REMOVE_OUTER_QUOTES);
    752     EXPECT_EQ(tests[i].expected_with_quotes, actual_with_quotes)
    753         << "Failed while processing: " << tests[i].header;
    754     EXPECT_EQ(tests[i].expected_without_quotes, actual_without_quotes)
    755         << "Failed while processing: " << tests[i].header;
    756   }
    757 }
    758 
    759 TEST(NetUtilTest, GetFileNameFromCD) {
    760   const FileNameCDCase tests[] = {
    761     // Test various forms of C-D header fields emitted by web servers.
    762     {"content-disposition: inline; filename=\"abcde.pdf\"", "", L"abcde.pdf"},
    763     {"content-disposition: inline; name=\"abcde.pdf\"", "", L"abcde.pdf"},
    764     {"content-disposition: attachment; filename=abcde.pdf", "", L"abcde.pdf"},
    765     {"content-disposition: attachment; name=abcde.pdf", "", L"abcde.pdf"},
    766     {"content-disposition: attachment; filename=abc,de.pdf", "", L"abc,de.pdf"},
    767     {"content-disposition: filename=abcde.pdf", "", L"abcde.pdf"},
    768     {"content-disposition: filename= abcde.pdf", "", L"abcde.pdf"},
    769     {"content-disposition: filename =abcde.pdf", "", L"abcde.pdf"},
    770     {"content-disposition: filename = abcde.pdf", "", L"abcde.pdf"},
    771     {"content-disposition: filename\t=abcde.pdf", "", L"abcde.pdf"},
    772     {"content-disposition: filename \t\t  =abcde.pdf", "", L"abcde.pdf"},
    773     {"content-disposition: name=abcde.pdf", "", L"abcde.pdf"},
    774     {"content-disposition: inline; filename=\"abc%20de.pdf\"", "",
    775      L"abc de.pdf"},
    776     // Unbalanced quotation mark
    777     {"content-disposition: filename=\"abcdef.pdf", "", L"abcdef.pdf"},
    778     // Whitespaces are converted to a space.
    779     {"content-disposition: inline; filename=\"abc  \t\nde.pdf\"", "",
    780      L"abc    de.pdf"},
    781     // %-escaped UTF-8
    782     {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20"
    783      "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"},
    784     {"Content-Disposition: attachment; filename=\"%F0%90%8C%B0%F0%90%8C%B1"
    785      "abc.jpg\"", "", L"\U00010330\U00010331abc.jpg"},
    786     {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0 \n"
    787      "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220  \xc608\xc220.jpg"},
    788     // RFC 2047 with various charsets and Q/B encodings
    789     {"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
    790      "D13=2Epng?=\"", "", L"\x82b8\x8853" L"3.png"},
    791     {"Content-Disposition: attachment; filename==?eUc-Kr?b?v7m8+iAzLnBuZw==?=",
    792      "", L"\xc608\xc220 3.png"},
    793     {"Content-Disposition: attachment; filename==?utf-8?Q?=E8=8A=B8=E8"
    794      "=A1=93_3=2Epng?=", "", L"\x82b8\x8853 3.png"},
    795     {"Content-Disposition: attachment; filename==?utf-8?Q?=F0=90=8C=B0"
    796      "_3=2Epng?=", "", L"\U00010330 3.png"},
    797     {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=e9_=2epng?=\"",
    798      "", L"caf\x00e9 .png"},
    799     // Space after an encoded word should be removed.
    800     {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=E9_?= .png\"",
    801      "", L"caf\x00e9 .png"},
    802     // Two encoded words with different charsets (not very likely to be emitted
    803     // by web servers in the wild). Spaces between them are removed.
    804     {"Content-Disposition: inline; filename=\"=?euc-kr?b?v7m8+iAz?="
    805      " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", "",
    806      L"\xc608\xc220 3\xc608\xc220.png"},
    807     {"Content-Disposition: attachment; filename=\"=?windows-1252?Q?caf=E9?="
    808      "  =?iso-8859-7?b?4eI=?= .png\"", "", L"caf\x00e9\x03b1\x03b2.png"},
    809     // Non-ASCII string is passed through and treated as UTF-8 as long as
    810     // it's valid as UTF-8 and regardless of |referrer_charset|.
    811     {"Content-Disposition: attachment; filename=caf\xc3\xa9.png",
    812      "iso-8859-1", L"caf\x00e9.png"},
    813     {"Content-Disposition: attachment; filename=caf\xc3\xa9.png",
    814      "", L"caf\x00e9.png"},
    815     // Non-ASCII/Non-UTF-8 string. Fall back to the referrer charset.
    816     {"Content-Disposition: attachment; filename=caf\xe5.png",
    817      "windows-1253", L"caf\x03b5.png"},
    818 #if 0
    819     // Non-ASCII/Non-UTF-8 string. Fall back to the native codepage.
    820     // TODO(jungshik): We need to set the OS default codepage
    821     // to a specific value before testing. On Windows, we can use
    822     // SetThreadLocale().
    823     {"Content-Disposition: attachment; filename=\xb0\xa1\xb0\xa2.png",
    824      "", L"\xac00\xac01.png"},
    825 #endif
    826     // Failure cases
    827     // Invalid hex-digit "G"
    828     {"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", "",
    829      L""},
    830     // Incomplete RFC 2047 encoded-word (missing '='' at the end)
    831     {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?", "", L""},
    832     // Extra character at the end of an encoded word
    833     {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?==",
    834      "", L""},
    835     // Extra token at the end of an encoded word
    836     {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?",
    837      "", L""},
    838     {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?=",
    839      "",  L""},
    840     // Incomplete hex-escaped chars
    841     {"Content-Disposition: attachment; filename==?windows-1252?Q?=63=61=E?=",
    842      "", L""},
    843     {"Content-Disposition: attachment; filename=%EC%98%88%EC%88%A", "", L""},
    844     // %-escaped non-UTF-8 encoding is an "error"
    845     {"Content-Disposition: attachment; filename=%B7%DD%BD%D1.png", "", L""},
    846     // Two RFC 2047 encoded words in a row without a space is an error.
    847     {"Content-Disposition: attachment; filename==?windows-1252?Q?caf=E3?="
    848      "=?iso-8859-7?b?4eIucG5nCg==?=", "", L""},
    849 
    850     // RFC 5987 tests with Filename*  : see http://tools.ietf.org/html/rfc5987
    851     {"Content-Disposition: attachment; filename*=foo.html", "", L""},
    852     {"Content-Disposition: attachment; filename*=foo'.html", "", L""},
    853     {"Content-Disposition: attachment; filename*=''foo'.html", "", L""},
    854     {"Content-Disposition: attachment; filename*=''foo.html'", "", L""},
    855     {"Content-Disposition: attachment; filename*=''f\"oo\".html'", "", L""},
    856     {"Content-Disposition: attachment; filename*=bogus_charset''foo.html'",
    857      "", L""},
    858     {"Content-Disposition: attachment; filename*='en'foo.html'", "", L""},
    859     {"Content-Disposition: attachment; filename*=iso-8859-1'en'foo.html", "",
    860       L"foo.html"},
    861     {"Content-Disposition: attachment; filename*=utf-8'en'foo.html", "",
    862       L"foo.html"},
    863     // charset cannot be omitted.
    864     {"Content-Disposition: attachment; filename*='es'f\xfa.html'", "", L""},
    865     // Non-ASCII bytes are not allowed.
    866     {"Content-Disposition: attachment; filename*=iso-8859-1'es'f\xfa.html", "",
    867       L""},
    868     {"Content-Disposition: attachment; filename*=utf-8'es'f\xce\xba.html", "",
    869       L""},
    870     // TODO(jshin): Space should be %-encoded, but currently, we allow
    871     // spaces.
    872     {"Content-Disposition: inline; filename*=iso88591''cafe foo.png", "",
    873       L"cafe foo.png"},
    874 
    875     // Filename* tests converted from Q-encoded tests above.
    876     {"Content-Disposition: attachment; filename*=EUC-JP''%B7%DD%BD%D13%2Epng",
    877      "", L"\x82b8\x8853" L"3.png"},
    878     {"Content-Disposition: attachment; filename*=utf-8''"
    879       "%E8%8A%B8%E8%A1%93%203%2Epng", "", L"\x82b8\x8853 3.png"},
    880     {"Content-Disposition: attachment; filename*=utf-8''%F0%90%8C%B0 3.png", "",
    881       L"\U00010330 3.png"},
    882     {"Content-Disposition: inline; filename*=Euc-Kr'ko'%BF%B9%BC%FA%2Epng", "",
    883      L"\xc608\xc220.png"},
    884     {"Content-Disposition: attachment; filename*=windows-1252''caf%E9.png", "",
    885       L"caf\x00e9.png"},
    886 
    887     // http://greenbytes.de/tech/tc2231/ filename* test cases.
    888     // attwithisofn2231iso
    889     {"Content-Disposition: attachment; filename*=iso-8859-1''foo-%E4.html", "",
    890       L"foo-\xe4.html"},
    891     // attwithfn2231utf8
    892     {"Content-Disposition: attachment; filename*="
    893       "UTF-8''foo-%c3%a4-%e2%82%ac.html", "", L"foo-\xe4-\x20ac.html"},
    894     // attwithfn2231noc : no encoding specified but UTF-8 is used.
    895     {"Content-Disposition: attachment; filename*=''foo-%c3%a4-%e2%82%ac.html",
    896       "", L""},
    897     // attwithfn2231utf8comp
    898     {"Content-Disposition: attachment; filename*=UTF-8''foo-a%cc%88.html", "",
    899       L"foo-\xe4.html"},
    900 #ifdef ICU_SHOULD_FAIL_CONVERSION_ON_INVALID_CHARACTER
    901     // This does not work because we treat ISO-8859-1 synonymous with
    902     // Windows-1252 per HTML5. For HTTP, in theory, we're not
    903     // supposed to.
    904     // attwithfn2231utf8-bad
    905     {"Content-Disposition: attachment; filename*="
    906       "iso-8859-1''foo-%c3%a4-%e2%82%ac.html", "", L""},
    907 #endif
    908     // attwithfn2231ws1
    909     {"Content-Disposition: attachment; filename *=UTF-8''foo-%c3%a4.html", "",
    910       L""},
    911     // attwithfn2231ws2
    912     {"Content-Disposition: attachment; filename*= UTF-8''foo-%c3%a4.html", "",
    913       L"foo-\xe4.html"},
    914     // attwithfn2231ws3
    915     {"Content-Disposition: attachment; filename* =UTF-8''foo-%c3%a4.html", "",
    916       L"foo-\xe4.html"},
    917     // attwithfn2231quot
    918     {"Content-Disposition: attachment; filename*=\"UTF-8''foo-%c3%a4.html\"",
    919       "", L""},
    920     // attfnboth
    921     {"Content-Disposition: attachment; filename=\"foo-ae.html\"; "
    922       "filename*=UTF-8''foo-%c3%a4.html", "", L"foo-\xe4.html"},
    923     // attfnboth2
    924     {"Content-Disposition: attachment; filename*=UTF-8''foo-%c3%a4.html; "
    925       "filename=\"foo-ae.html\"", "", L"foo-\xe4.html"},
    926     // attnewandfn
    927     {"Content-Disposition: attachment; foobar=x; filename=\"foo.html\"", "",
    928       L"foo.html"},
    929   };
    930   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
    931     EXPECT_EQ(tests[i].expected,
    932               UTF8ToWide(GetFileNameFromCD(tests[i].header_field,
    933                                            tests[i].referrer_charset)))
    934         << "Failed on input: " << tests[i].header_field;
    935   }
    936 }
    937 
    938 TEST(NetUtilTest, IDNToUnicodeFast) {
    939   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
    940     for (size_t j = 0; j < arraysize(kLanguages); j++) {
    941       // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
    942       if (j == 3 || j == 17 || j == 18)
    943         continue;
    944       std::wstring output(IDNToUnicode(idn_cases[i].input,
    945           strlen(idn_cases[i].input), kLanguages[j], NULL));
    946       std::wstring expected(idn_cases[i].unicode_allowed[j] ?
    947           idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input));
    948       AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
    949       EXPECT_EQ(expected, output);
    950     }
    951   }
    952 }
    953 
    954 TEST(NetUtilTest, IDNToUnicodeSlow) {
    955   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
    956     for (size_t j = 0; j < arraysize(kLanguages); j++) {
    957       // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
    958       if (!(j == 3 || j == 17 || j == 18))
    959         continue;
    960       std::wstring output(IDNToUnicode(idn_cases[i].input,
    961           strlen(idn_cases[i].input), kLanguages[j], NULL));
    962       std::wstring expected(idn_cases[i].unicode_allowed[j] ?
    963           idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input));
    964       AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
    965       EXPECT_EQ(expected, output);
    966     }
    967   }
    968 }
    969 
    970 TEST(NetUtilTest, IDNToUnicodeAdjustOffset) {
    971   const AdjustOffsetCase adjust_cases[] = {
    972     {0, 0},
    973     {2, 2},
    974     {4, 4},
    975     {5, 5},
    976     {6, string16::npos},
    977     {16, string16::npos},
    978     {17, 7},
    979     {18, 8},
    980     {19, string16::npos},
    981     {25, string16::npos},
    982     {34, 12},
    983     {35, 13},
    984     {38, 16},
    985     {39, string16::npos},
    986     {string16::npos, string16::npos},
    987   };
    988   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(adjust_cases); ++i) {
    989     size_t offset = adjust_cases[i].input_offset;
    990     // "test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test"
    991     IDNToUnicode("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39, L"zh-CN",
    992                       &offset);
    993     EXPECT_EQ(adjust_cases[i].output_offset, offset);
    994   }
    995 
    996   std::vector<size_t> offsets;
    997   for (size_t i = 0; i < 40; ++i)
    998     offsets.push_back(i);
    999   IDNToUnicodeWithOffsets("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39,
   1000                           L"zh-CN", &offsets);
   1001   size_t expected[] = {0, 1, 2, 3, 4, 5, kNpos, kNpos, kNpos, kNpos, kNpos,
   1002                        kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, 8, kNpos,
   1003                        kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
   1004                        kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15,
   1005                        16, kNpos};
   1006   ASSERT_EQ(40U, arraysize(expected));
   1007   for (size_t i = 0; i < 40; ++i)
   1008     EXPECT_EQ(expected[i], offsets[i]);
   1009 }
   1010 
   1011 TEST(NetUtilTest, CompliantHost) {
   1012   const CompliantHostCase compliant_host_cases[] = {
   1013     {"", "", false},
   1014     {"a", "", true},
   1015     {"-", "", false},
   1016     {".", "", false},
   1017     {"9", "", false},
   1018     {"9", "a", true},
   1019     {"9a", "", false},
   1020     {"9a", "a", true},
   1021     {"a.", "", true},
   1022     {"a.a", "", true},
   1023     {"9.a", "", true},
   1024     {"a.9", "", false},
   1025     {"_9a", "", false},
   1026     {"a.a9", "", true},
   1027     {"a.9a", "", false},
   1028     {"a+9a", "", false},
   1029     {"1-.a-b", "", false},
   1030     {"1-2.a_b", "", true},
   1031     {"a.b.c.d.e", "", true},
   1032     {"1.2.3.4.e", "", true},
   1033     {"a.b.c.d.5", "", false},
   1034     {"1.2.3.4.e.", "", true},
   1035     {"a.b.c.d.5.", "", false},
   1036   };
   1037 
   1038   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(compliant_host_cases); ++i) {
   1039     EXPECT_EQ(compliant_host_cases[i].expected_output,
   1040         IsCanonicalizedHostCompliant(compliant_host_cases[i].host,
   1041                                           compliant_host_cases[i].desired_tld));
   1042   }
   1043 }
   1044 
   1045 TEST(NetUtilTest, StripWWW) {
   1046   EXPECT_EQ(string16(), StripWWW(string16()));
   1047   EXPECT_EQ(string16(), StripWWW(ASCIIToUTF16("www.")));
   1048   EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah")));
   1049   EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah")));
   1050 }
   1051 
   1052 TEST(NetUtilTest, GetSuggestedFilename) {
   1053   const SuggestedFilenameCase test_cases[] = {
   1054     {"http://www.google.com/",
   1055      "Content-disposition: attachment; filename=test.html",
   1056      "",
   1057      L"",
   1058      L"test.html"},
   1059     {"http://www.google.com/",
   1060      "Content-disposition: attachment; filename=\"test.html\"",
   1061      "",
   1062      L"",
   1063      L"test.html"},
   1064     {"http://www.google.com/path/test.html",
   1065      "Content-disposition: attachment",
   1066      "",
   1067      L"",
   1068      L"test.html"},
   1069     {"http://www.google.com/path/test.html",
   1070      "Content-disposition: attachment;",
   1071      "",
   1072      L"",
   1073      L"test.html"},
   1074     {"http://www.google.com/",
   1075      "",
   1076      "",
   1077      L"",
   1078      L"www.google.com"},
   1079     {"http://www.google.com/test.html",
   1080      "",
   1081      "",
   1082      L"",
   1083      L"test.html"},
   1084     // Now that we use googleurl's ExtractFileName, this case falls back
   1085     // to the hostname. If this behavior is not desirable, we'd better
   1086     // change ExtractFileName (in url_parse).
   1087     {"http://www.google.com/path/",
   1088      "",
   1089      "",
   1090      L"",
   1091      L"www.google.com"},
   1092     {"http://www.google.com/path",
   1093      "",
   1094      "",
   1095      L"",
   1096      L"path"},
   1097     {"file:///",
   1098      "",
   1099      "",
   1100      L"",
   1101      L"download"},
   1102     {"non-standard-scheme:",
   1103      "",
   1104      "",
   1105      L"",
   1106      L"download"},
   1107     {"http://www.google.com/",
   1108      "Content-disposition: attachment; filename =\"test.html\"",
   1109      "",
   1110      L"download",
   1111      L"test.html"},
   1112     {"http://www.google.com/",
   1113      "",
   1114      "",
   1115      L"download",
   1116      L"download"},
   1117     {"http://www.google.com/",
   1118      "Content-disposition: attachment; filename=\"../test.html\"",
   1119      "",
   1120      L"",
   1121      L"_test.html"},
   1122     {"http://www.google.com/",
   1123      "Content-disposition: attachment; filename=\"..\\test.html\"",
   1124      "",
   1125      L"",
   1126      L"_test.html"},
   1127     {"http://www.google.com/",
   1128      "Content-disposition: attachment; filename=\"..\"",
   1129      "",
   1130      L"download",
   1131      L"download"},
   1132     {"http://www.google.com/test.html",
   1133      "Content-disposition: attachment; filename=\"..\"",
   1134      "",
   1135      L"download",
   1136      L"test.html"},
   1137     // Below is a small subset of cases taken from GetFileNameFromCD test above.
   1138     {"http://www.google.com/",
   1139      "Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20"
   1140      "%EC%98%88%EC%88%A0.jpg\"",
   1141      "",
   1142      L"",
   1143      L"\uc608\uc220 \uc608\uc220.jpg"},
   1144     {"http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg",
   1145      "",
   1146      "",
   1147      L"download",
   1148      L"\uc608\uc220 \uc608\uc220.jpg"},
   1149     {"http://www.google.com/",
   1150      "Content-disposition: attachment;",
   1151      "",
   1152      L"\uB2E4\uC6B4\uB85C\uB4DC",
   1153      L"\uB2E4\uC6B4\uB85C\uB4DC"},
   1154     {"http://www.google.com/",
   1155      "Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
   1156      "D13=2Epng?=\"",
   1157      "",
   1158      L"download",
   1159      L"\u82b8\u88533.png"},
   1160     {"http://www.example.com/images?id=3",
   1161      "Content-Disposition: attachment; filename=caf\xc3\xa9.png",
   1162      "iso-8859-1",
   1163      L"",
   1164      L"caf\u00e9.png"},
   1165     {"http://www.example.com/images?id=3",
   1166      "Content-Disposition: attachment; filename=caf\xe5.png",
   1167      "windows-1253",
   1168      L"",
   1169      L"caf\u03b5.png"},
   1170     {"http://www.example.com/file?id=3",
   1171      "Content-Disposition: attachment; name=\xcf\xc2\xd4\xd8.zip",
   1172      "GBK",
   1173      L"",
   1174      L"\u4e0b\u8f7d.zip"},
   1175     // Invalid C-D header. Extracts filename from url.
   1176     {"http://www.google.com/test.html",
   1177      "Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=",
   1178      "",
   1179      L"",
   1180      L"test.html"},
   1181     // about: and data: URLs
   1182     {"about:chrome",
   1183      "",
   1184      "",
   1185      L"",
   1186      L"download"},
   1187     {"data:,looks/like/a.path",
   1188      "",
   1189      "",
   1190      L"",
   1191      L"download"},
   1192     {"data:text/plain;base64,VG8gYmUgb3Igbm90IHRvIGJlLg=",
   1193      "",
   1194      "",
   1195      L"",
   1196      L"download"},
   1197     {"data:,looks/like/a.path",
   1198      "",
   1199      "",
   1200      L"default_filename_is_given",
   1201      L"default_filename_is_given"},
   1202     {"data:,looks/like/a.path",
   1203      "",
   1204      "",
   1205      L"\u65e5\u672c\u8a9e",  // Japanese Kanji.
   1206      L"\u65e5\u672c\u8a9e"},
   1207     // Dotfiles. Ensures preceeding period(s) stripped.
   1208     {"http://www.google.com/.test.html",
   1209     "",
   1210     "",
   1211     L"",
   1212     L"test.html"},
   1213     {"http://www.google.com/.test",
   1214     "",
   1215     "",
   1216     L"",
   1217     L"test"},
   1218     {"http://www.google.com/..test",
   1219     "",
   1220     "",
   1221     L"",
   1222     L"test"},
   1223     // The filename encoding is specified by the referrer charset.
   1224     {"http://example.com/V%FDvojov%E1%20psychologie.doc",
   1225      "",
   1226      "iso-8859-1",
   1227      L"",
   1228      L"V\u00fdvojov\u00e1 psychologie.doc"},
   1229     // The filename encoding doesn't match the referrer charset, the
   1230     // system charset, or UTF-8.
   1231     // TODO(jshin): we need to handle this case.
   1232 #if 0
   1233     {"http://example.com/V%FDvojov%E1%20psychologie.doc",
   1234      "",
   1235      "utf-8",
   1236      L"",
   1237      L"V\u00fdvojov\u00e1 psychologie.doc",
   1238     },
   1239 #endif
   1240   };
   1241   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
   1242     std::wstring default_name = test_cases[i].default_filename;
   1243     string16 filename = GetSuggestedFilename(
   1244         GURL(test_cases[i].url), test_cases[i].content_disp_header,
   1245         test_cases[i].referrer_charset, WideToUTF16(default_name));
   1246     EXPECT_EQ(std::wstring(test_cases[i].expected_filename),
   1247               UTF16ToWide(filename))
   1248       << "Iteration " << i << ": " << test_cases[i].url;
   1249   }
   1250 }
   1251 
   1252 // This is currently a windows specific function.
   1253 #if defined(OS_WIN)
   1254 namespace {
   1255 
   1256 struct GetDirectoryListingEntryCase {
   1257   const wchar_t* name;
   1258   const char* raw_bytes;
   1259   bool is_dir;
   1260   int64 filesize;
   1261   base::Time time;
   1262   const char* expected;
   1263 };
   1264 
   1265 }  // namespace
   1266 TEST(NetUtilTest, GetDirectoryListingEntry) {
   1267   const GetDirectoryListingEntryCase test_cases[] = {
   1268     {L"Foo",
   1269      "",
   1270      false,
   1271      10000,
   1272      base::Time(),
   1273      "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"},
   1274     {L"quo\"tes",
   1275      "",
   1276      false,
   1277      10000,
   1278      base::Time(),
   1279      "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
   1280          "\n"},
   1281     {L"quo\"tes",
   1282      "quo\"tes",
   1283      false,
   1284      10000,
   1285      base::Time(),
   1286      "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
   1287          "\n"},
   1288     // U+D55C0 U+AE00. raw_bytes is empty (either a local file with
   1289     // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8
   1290     {L"\xD55C\xAE00.txt",
   1291      "",
   1292      false,
   1293      10000,
   1294      base::Time(),
   1295      "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%ED%95%9C%EA%B8%80.txt\""
   1296          ",0,\"9.8 kB\",\"\");</script>\n"},
   1297     // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence:
   1298     // a local or remote file in EUC-KR.
   1299     {L"\xD55C\xAE00.txt",
   1300      "\xC7\xD1\xB1\xDB.txt",
   1301      false,
   1302      10000,
   1303      base::Time(),
   1304      "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%C7%D1%B1%DB.txt\""
   1305          ",0,\"9.8 kB\",\"\");</script>\n"},
   1306   };
   1307 
   1308   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
   1309     const std::string results = GetDirectoryListingEntry(
   1310         WideToUTF16(test_cases[i].name),
   1311         test_cases[i].raw_bytes,
   1312         test_cases[i].is_dir,
   1313         test_cases[i].filesize,
   1314         test_cases[i].time);
   1315     EXPECT_EQ(test_cases[i].expected, results);
   1316   }
   1317 }
   1318 
   1319 #endif
   1320 
   1321 TEST(NetUtilTest, ParseHostAndPort) {
   1322   const struct {
   1323     const char* input;
   1324     bool success;
   1325     const char* expected_host;
   1326     int expected_port;
   1327   } tests[] = {
   1328     // Valid inputs:
   1329     {"foo:10", true, "foo", 10},
   1330     {"foo", true, "foo", -1},
   1331     {
   1332       "[1080:0:0:0:8:800:200C:4171]:11",
   1333       true,
   1334       "[1080:0:0:0:8:800:200C:4171]",
   1335       11,
   1336     },
   1337     // Invalid inputs:
   1338     {"foo:bar", false, "", -1},
   1339     {"foo:", false, "", -1},
   1340     {":", false, "", -1},
   1341     {":80", false, "", -1},
   1342     {"", false, "", -1},
   1343     {"porttoolong:300000", false, "", -1},
   1344     {"usrname@host", false, "", -1},
   1345     {"usrname:password@host", false, "", -1},
   1346     {":password@host", false, "", -1},
   1347     {":password@host:80", false, "", -1},
   1348     {":password@host", false, "", -1},
   1349     {"@host", false, "", -1},
   1350   };
   1351 
   1352   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
   1353     std::string host;
   1354     int port;
   1355     bool ok = ParseHostAndPort(tests[i].input, &host, &port);
   1356 
   1357     EXPECT_EQ(tests[i].success, ok);
   1358 
   1359     if (tests[i].success) {
   1360       EXPECT_EQ(tests[i].expected_host, host);
   1361       EXPECT_EQ(tests[i].expected_port, port);
   1362     }
   1363   }
   1364 }
   1365 
   1366 TEST(NetUtilTest, GetHostAndPort) {
   1367   const struct {
   1368     GURL url;
   1369     const char* expected_host_and_port;
   1370   } tests[] = {
   1371     { GURL("http://www.foo.com/x"), "www.foo.com:80"},
   1372     { GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
   1373 
   1374     // For IPv6 literals should always include the brackets.
   1375     { GURL("http://[1::2]/x"), "[1::2]:80"},
   1376     { GURL("http://[::a]:33/x"), "[::a]:33"},
   1377   };
   1378   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
   1379     std::string host_and_port = GetHostAndPort(tests[i].url);
   1380     EXPECT_EQ(std::string(tests[i].expected_host_and_port), host_and_port);
   1381   }
   1382 }
   1383 
   1384 TEST(NetUtilTest, GetHostAndOptionalPort) {
   1385   const struct {
   1386     GURL url;
   1387     const char* expected_host_and_port;
   1388   } tests[] = {
   1389     { GURL("http://www.foo.com/x"), "www.foo.com"},
   1390     { GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
   1391 
   1392     // For IPv6 literals should always include the brackets.
   1393     { GURL("http://[1::2]/x"), "[1::2]"},
   1394     { GURL("http://[::a]:33/x"), "[::a]:33"},
   1395   };
   1396   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
   1397     std::string host_and_port = GetHostAndOptionalPort(tests[i].url);
   1398     EXPECT_EQ(std::string(tests[i].expected_host_and_port), host_and_port);
   1399   }
   1400 }
   1401 
   1402 
   1403 TEST(NetUtilTest, NetAddressToString_IPv4) {
   1404   const struct {
   1405     uint8 addr[4];
   1406     const char* result;
   1407   } tests[] = {
   1408     {{0, 0, 0, 0}, "0.0.0.0"},
   1409     {{127, 0, 0, 1}, "127.0.0.1"},
   1410     {{192, 168, 0, 1}, "192.168.0.1"},
   1411   };
   1412 
   1413   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
   1414     const addrinfo* ai = GetIPv4Address(tests[i].addr, 80);
   1415     std::string result = NetAddressToString(ai);
   1416     EXPECT_EQ(std::string(tests[i].result), result);
   1417   }
   1418 }
   1419 
   1420 TEST(NetUtilTest, NetAddressToString_IPv6) {
   1421   const struct {
   1422     uint8 addr[16];
   1423     const char* result;
   1424   } tests[] = {
   1425     {{0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, 0xFE, 0xDC, 0xBA,
   1426       0x98, 0x76, 0x54, 0x32, 0x10},
   1427      "fedc:ba98:7654:3210:fedc:ba98:7654:3210"},
   1428   };
   1429 
   1430   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
   1431     const addrinfo* ai = GetIPv6Address(tests[i].addr, 80);
   1432     std::string result = NetAddressToString(ai);
   1433     // Allow NetAddressToString() to fail, in case the system doesn't
   1434     // support IPv6.
   1435     if (!result.empty())
   1436       EXPECT_EQ(std::string(tests[i].result), result);
   1437   }
   1438 }
   1439 
   1440 TEST(NetUtilTest, NetAddressToStringWithPort_IPv4) {
   1441   uint8 addr[] = {127, 0, 0, 1};
   1442   const addrinfo* ai = GetIPv4Address(addr, 166);
   1443   std::string result = NetAddressToStringWithPort(ai);
   1444   EXPECT_EQ("127.0.0.1:166", result);
   1445 }
   1446 
   1447 TEST(NetUtilTest, NetAddressToStringWithPort_IPv6) {
   1448   uint8 addr[] = {
   1449       0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, 0xFE, 0xDC, 0xBA,
   1450       0x98, 0x76, 0x54, 0x32, 0x10
   1451   };
   1452   const addrinfo* ai = GetIPv6Address(addr, 361);
   1453   std::string result = NetAddressToStringWithPort(ai);
   1454 
   1455   // May fail on systems that don't support IPv6.
   1456   if (!result.empty())
   1457     EXPECT_EQ("[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:361", result);
   1458 }
   1459 
   1460 TEST(NetUtilTest, GetHostName) {
   1461   // We can't check the result of GetHostName() directly, since the result
   1462   // will differ across machines. Our goal here is to simply exercise the
   1463   // code path, and check that things "look about right".
   1464   std::string hostname = GetHostName();
   1465   EXPECT_FALSE(hostname.empty());
   1466 }
   1467 
   1468 TEST(NetUtilTest, FormatUrl) {
   1469   FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
   1470   const UrlTestData tests[] = {
   1471     {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0},
   1472 
   1473     {"Simple URL",
   1474      "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL,
   1475      L"http://www.google.com/", 7},
   1476 
   1477     {"With a port number and a reference",
   1478      "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type,
   1479      UnescapeRule::NORMAL,
   1480      L"http://www.google.com:8080/#\x30B0", 7},
   1481 
   1482     // -------- IDN tests --------
   1483     {"Japanese IDN with ja",
   1484      "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
   1485      UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
   1486 
   1487     {"Japanese IDN with en",
   1488      "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type,
   1489      UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7},
   1490 
   1491     {"Japanese IDN without any languages",
   1492      "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type,
   1493      UnescapeRule::NORMAL,
   1494      // Single script is safe for empty languages.
   1495      L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
   1496 
   1497     {"mailto: with Japanese IDN",
   1498      "mailto:foo (at) xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
   1499      UnescapeRule::NORMAL,
   1500      // GURL doesn't assume an email address's domain part as a host name.
   1501      L"mailto:foo (at) xn--l8jvb1ey91xtjb.jp", 7},
   1502 
   1503     {"file: with Japanese IDN",
   1504      "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
   1505      UnescapeRule::NORMAL,
   1506      L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
   1507 
   1508     {"ftp: with Japanese IDN",
   1509      "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
   1510      UnescapeRule::NORMAL,
   1511      L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
   1512 
   1513     // -------- omit_username_password flag tests --------
   1514     {"With username and password, omit_username_password=false",
   1515      "http://user:passwd@example.com/foo", "",
   1516      kFormatUrlOmitNothing, UnescapeRule::NORMAL,
   1517      L"http://user:passwd@example.com/foo", 19},
   1518 
   1519     {"With username and password, omit_username_password=true",
   1520      "http://user:passwd@example.com/foo", "", default_format_type,
   1521      UnescapeRule::NORMAL, L"http://example.com/foo", 7},
   1522 
   1523     {"With username and no password",
   1524      "http://user@example.com/foo", "", default_format_type,
   1525      UnescapeRule::NORMAL, L"http://example.com/foo", 7},
   1526 
   1527     {"Just '@' without username and password",
   1528      "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL,
   1529      L"http://example.com/foo", 7},
   1530 
   1531     // GURL doesn't think local-part of an email address is username for URL.
   1532     {"mailto:, omit_username_password=true",
   1533      "mailto:foo (at) example.com", "", default_format_type, UnescapeRule::NORMAL,
   1534      L"mailto:foo (at) example.com", 7},
   1535 
   1536     // -------- unescape flag tests --------
   1537     {"Do not unescape",
   1538      "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
   1539      "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
   1540      "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
   1541      UnescapeRule::NONE,
   1542      // GURL parses %-encoded hostnames into Punycode.
   1543      L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
   1544      L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
   1545 
   1546     {"Unescape normally",
   1547      "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
   1548      "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
   1549      "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
   1550      UnescapeRule::NORMAL,
   1551      L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
   1552      L"?q=\x30B0\x30FC\x30B0\x30EB", 7},
   1553 
   1554     {"Unescape normally including unescape spaces",
   1555      "http://www.google.com/search?q=Hello%20World", "en", default_format_type,
   1556      UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7},
   1557 
   1558     /*
   1559     {"unescape=true with some special characters",
   1560     "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
   1561     kFormatUrlOmitNothing, UnescapeRule::NORMAL,
   1562     L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
   1563     */
   1564     // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
   1565 
   1566     // -------- omit http: --------
   1567     {"omit http with user name",
   1568      "http://user@example.com/foo", "", kFormatUrlOmitAll,
   1569      UnescapeRule::NORMAL, L"example.com/foo", 0},
   1570 
   1571     {"omit http",
   1572      "http://www.google.com/", "en", kFormatUrlOmitHTTP,
   1573      UnescapeRule::NORMAL, L"www.google.com/",
   1574      0},
   1575 
   1576     {"omit http with https",
   1577      "https://www.google.com/", "en", kFormatUrlOmitHTTP,
   1578      UnescapeRule::NORMAL, L"https://www.google.com/",
   1579      8},
   1580 
   1581     {"omit http starts with ftp.",
   1582      "http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
   1583      UnescapeRule::NORMAL, L"http://ftp.google.com/",
   1584      7},
   1585 
   1586     // -------- omit trailing slash on bare hostname --------
   1587     {"omit slash when it's the entire path",
   1588      "http://www.google.com/", "en",
   1589      kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
   1590      L"http://www.google.com", 7},
   1591     {"omit slash when there's a ref",
   1592      "http://www.google.com/#ref", "en",
   1593      kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
   1594      L"http://www.google.com/#ref", 7},
   1595     {"omit slash when there's a query",
   1596      "http://www.google.com/?", "en",
   1597      kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
   1598      L"http://www.google.com/?", 7},
   1599     {"omit slash when it's not the entire path",
   1600      "http://www.google.com/foo", "en",
   1601      kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
   1602      L"http://www.google.com/foo", 7},
   1603     {"omit slash for nonstandard URLs",
   1604      "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
   1605      UnescapeRule::NORMAL, L"data:/", 5},
   1606     {"omit slash for file URLs",
   1607      "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
   1608      UnescapeRule::NORMAL, L"file:///", 7},
   1609 
   1610     // -------- view-source: --------
   1611     {"view-source",
   1612      "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type,
   1613      UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/",
   1614      19},
   1615 
   1616     {"view-source of view-source",
   1617      "view-source:view-source:http://xn--qcka1pmc.jp/", "ja",
   1618      default_format_type, UnescapeRule::NORMAL,
   1619      L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
   1620 
   1621     // view-source should omit http and trailing slash where non-view-source
   1622     // would.
   1623     {"view-source omit http",
   1624      "view-source:http://a.b/c", "en", kFormatUrlOmitAll,
   1625      UnescapeRule::NORMAL, L"view-source:a.b/c",
   1626      12},
   1627     {"view-source omit http starts with ftp.",
   1628      "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll,
   1629      UnescapeRule::NORMAL, L"view-source:http://ftp.b/c",
   1630      19},
   1631     {"view-source omit slash when it's the entire path",
   1632      "view-source:http://a.b/", "en", kFormatUrlOmitAll,
   1633      UnescapeRule::NORMAL, L"view-source:a.b",
   1634      12},
   1635   };
   1636 
   1637   for (size_t i = 0; i < arraysize(tests); ++i) {
   1638     size_t prefix_len;
   1639     string16 formatted = FormatUrl(
   1640         GURL(tests[i].input), tests[i].languages, tests[i].format_types,
   1641         tests[i].escape_rules, NULL, &prefix_len, NULL);
   1642     EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
   1643     EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
   1644   }
   1645 }
   1646 
   1647 TEST(NetUtilTest, FormatUrlParsed) {
   1648   // No unescape case.
   1649   url_parse::Parsed parsed;
   1650   string16 formatted = FormatUrl(
   1651       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
   1652            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
   1653       "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL,
   1654       NULL);
   1655   EXPECT_EQ(WideToUTF16(
   1656       L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
   1657       L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
   1658   EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
   1659       formatted.substr(parsed.username.begin, parsed.username.len));
   1660   EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
   1661       formatted.substr(parsed.password.begin, parsed.password.len));
   1662   EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
   1663       formatted.substr(parsed.host.begin, parsed.host.len));
   1664   EXPECT_EQ(WideToUTF16(L"8080"),
   1665       formatted.substr(parsed.port.begin, parsed.port.len));
   1666   EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"),
   1667       formatted.substr(parsed.path.begin, parsed.path.len));
   1668   EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"),
   1669       formatted.substr(parsed.query.begin, parsed.query.len));
   1670   EXPECT_EQ(WideToUTF16(L"\x30B0"),
   1671       formatted.substr(parsed.ref.begin, parsed.ref.len));
   1672 
   1673   // Unescape case.
   1674   formatted = FormatUrl(
   1675       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
   1676            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
   1677       "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL,
   1678       NULL);
   1679   EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
   1680       L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
   1681   EXPECT_EQ(WideToUTF16(L"\x30B0"),
   1682       formatted.substr(parsed.username.begin, parsed.username.len));
   1683   EXPECT_EQ(WideToUTF16(L"\x30FC"),
   1684       formatted.substr(parsed.password.begin, parsed.password.len));
   1685   EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
   1686       formatted.substr(parsed.host.begin, parsed.host.len));
   1687   EXPECT_EQ(WideToUTF16(L"8080"),
   1688       formatted.substr(parsed.port.begin, parsed.port.len));
   1689   EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
   1690       formatted.substr(parsed.path.begin, parsed.path.len));
   1691   EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
   1692       formatted.substr(parsed.query.begin, parsed.query.len));
   1693   EXPECT_EQ(WideToUTF16(L"\x30B0"),
   1694       formatted.substr(parsed.ref.begin, parsed.ref.len));
   1695 
   1696   // Omit_username_password + unescape case.
   1697   formatted = FormatUrl(
   1698       GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
   1699            "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
   1700       "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed,
   1701       NULL, NULL);
   1702   EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
   1703       L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
   1704   EXPECT_FALSE(parsed.username.is_valid());
   1705   EXPECT_FALSE(parsed.password.is_valid());
   1706   EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
   1707       formatted.substr(parsed.host.begin, parsed.host.len));
   1708   EXPECT_EQ(WideToUTF16(L"8080"),
   1709       formatted.substr(parsed.port.begin, parsed.port.len));
   1710   EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
   1711       formatted.substr(parsed.path.begin, parsed.path.len));
   1712   EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
   1713       formatted.substr(parsed.query.begin, parsed.query.len));
   1714   EXPECT_EQ(WideToUTF16(L"\x30B0"),
   1715       formatted.substr(parsed.ref.begin, parsed.ref.len));
   1716 
   1717   // View-source case.
   1718   formatted = FormatUrl(
   1719       GURL("view-source:http://user:passwd@host:81/path?query#ref"),
   1720       "", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed,
   1721       NULL, NULL);
   1722   EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"),
   1723       formatted);
   1724   EXPECT_EQ(WideToUTF16(L"view-source:http"),
   1725       formatted.substr(parsed.scheme.begin, parsed.scheme.len));
   1726   EXPECT_FALSE(parsed.username.is_valid());
   1727   EXPECT_FALSE(parsed.password.is_valid());
   1728   EXPECT_EQ(WideToUTF16(L"host"),
   1729       formatted.substr(parsed.host.begin, parsed.host.len));
   1730   EXPECT_EQ(WideToUTF16(L"81"),
   1731       formatted.substr(parsed.port.begin, parsed.port.len));
   1732   EXPECT_EQ(WideToUTF16(L"/path"),
   1733       formatted.substr(parsed.path.begin, parsed.path.len));
   1734   EXPECT_EQ(WideToUTF16(L"query"),
   1735       formatted.substr(parsed.query.begin, parsed.query.len));
   1736   EXPECT_EQ(WideToUTF16(L"ref"),
   1737       formatted.substr(parsed.ref.begin, parsed.ref.len));
   1738 
   1739   // omit http case.
   1740   formatted = FormatUrl(
   1741       GURL("http://host:8000/a?b=c#d"),
   1742       "", kFormatUrlOmitHTTP, UnescapeRule::NORMAL, &parsed, NULL, NULL);
   1743   EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted);
   1744   EXPECT_FALSE(parsed.scheme.is_valid());
   1745   EXPECT_FALSE(parsed.username.is_valid());
   1746   EXPECT_FALSE(parsed.password.is_valid());
   1747   EXPECT_EQ(WideToUTF16(L"host"),
   1748       formatted.substr(parsed.host.begin, parsed.host.len));
   1749   EXPECT_EQ(WideToUTF16(L"8000"),
   1750       formatted.substr(parsed.port.begin, parsed.port.len));
   1751   EXPECT_EQ(WideToUTF16(L"/a"),
   1752       formatted.substr(parsed.path.begin, parsed.path.len));
   1753   EXPECT_EQ(WideToUTF16(L"b=c"),
   1754       formatted.substr(parsed.query.begin, parsed.query.len));
   1755   EXPECT_EQ(WideToUTF16(L"d"),
   1756       formatted.substr(parsed.ref.begin, parsed.ref.len));
   1757 
   1758   // omit http starts with ftp case.
   1759   formatted = FormatUrl(
   1760       GURL("http://ftp.host:8000/a?b=c#d"),
   1761       "", kFormatUrlOmitHTTP, UnescapeRule::NORMAL, &parsed, NULL, NULL);
   1762   EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted);
   1763   EXPECT_TRUE(parsed.scheme.is_valid());
   1764   EXPECT_FALSE(parsed.username.is_valid());
   1765   EXPECT_FALSE(parsed.password.is_valid());
   1766   EXPECT_EQ(WideToUTF16(L"http"),
   1767       formatted.substr(parsed.scheme.begin, parsed.scheme.len));
   1768   EXPECT_EQ(WideToUTF16(L"ftp.host"),
   1769       formatted.substr(parsed.host.begin, parsed.host.len));
   1770   EXPECT_EQ(WideToUTF16(L"8000"),
   1771       formatted.substr(parsed.port.begin, parsed.port.len));
   1772   EXPECT_EQ(WideToUTF16(L"/a"),
   1773       formatted.substr(parsed.path.begin, parsed.path.len));
   1774   EXPECT_EQ(WideToUTF16(L"b=c"),
   1775       formatted.substr(parsed.query.begin, parsed.query.len));
   1776   EXPECT_EQ(WideToUTF16(L"d"),
   1777       formatted.substr(parsed.ref.begin, parsed.ref.len));
   1778 
   1779   // omit http starts with 'f' case.
   1780   formatted = FormatUrl(
   1781       GURL("http://f/"),
   1782       "", kFormatUrlOmitHTTP, UnescapeRule::NORMAL, &parsed, NULL, NULL);
   1783   EXPECT_EQ(WideToUTF16(L"f/"), formatted);
   1784   EXPECT_FALSE(parsed.scheme.is_valid());
   1785   EXPECT_FALSE(parsed.username.is_valid());
   1786   EXPECT_FALSE(parsed.password.is_valid());
   1787   EXPECT_FALSE(parsed.port.is_valid());
   1788   EXPECT_TRUE(parsed.path.is_valid());
   1789   EXPECT_FALSE(parsed.query.is_valid());
   1790   EXPECT_FALSE(parsed.ref.is_valid());
   1791   EXPECT_EQ(WideToUTF16(L"f"),
   1792       formatted.substr(parsed.host.begin, parsed.host.len));
   1793   EXPECT_EQ(WideToUTF16(L"/"),
   1794       formatted.substr(parsed.path.begin, parsed.path.len));
   1795 }
   1796 
   1797 TEST(NetUtilTest, FormatUrlAdjustOffset) {
   1798   const AdjustOffsetCase basic_cases[] = {
   1799     {0, 0},
   1800     {3, 3},
   1801     {5, 5},
   1802     {6, 6},
   1803     {13, 13},
   1804     {21, 21},
   1805     {22, 22},
   1806     {23, 23},
   1807     {25, 25},
   1808     {26, string16::npos},
   1809     {500000, string16::npos},
   1810     {string16::npos, string16::npos},
   1811   };
   1812   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(basic_cases); ++i) {
   1813     size_t offset = basic_cases[i].input_offset;
   1814     FormatUrl(GURL("http://www.google.com/foo/"), "en",
   1815                    kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
   1816                    NULL, NULL, &offset);
   1817     EXPECT_EQ(basic_cases[i].output_offset, offset);
   1818   }
   1819 
   1820   size_t url_size = 26;
   1821   std::vector<size_t> offsets;
   1822   for (size_t i = 0; i < url_size + 1; ++i)
   1823     offsets.push_back(i);
   1824   FormatUrlWithOffsets(GURL("http://www.google.com/foo/"), "en",
   1825                        kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
   1826                        NULL, NULL, &offsets);
   1827   for (size_t i = 0; i < url_size; ++i)
   1828     EXPECT_EQ(i, offsets[i]);
   1829   EXPECT_EQ(kNpos, offsets[url_size]);
   1830 
   1831   const struct {
   1832     const char* input_url;
   1833     size_t input_offset;
   1834     size_t output_offset;
   1835   } omit_auth_cases[] = {
   1836     {"http://foo:bar@www.google.com/", 6, 6},
   1837     {"http://foo:bar@www.google.com/", 7, string16::npos},
   1838     {"http://foo:bar@www.google.com/", 8, string16::npos},
   1839     {"http://foo:bar@www.google.com/", 10, string16::npos},
   1840     {"http://foo:bar@www.google.com/", 11, string16::npos},
   1841     {"http://foo:bar@www.google.com/", 14, string16::npos},
   1842     {"http://foo:bar@www.google.com/", 15, 7},
   1843     {"http://foo:bar@www.google.com/", 25, 17},
   1844     {"http://foo@www.google.com/", 9, string16::npos},
   1845     {"http://foo@www.google.com/", 11, 7},
   1846   };
   1847   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_auth_cases); ++i) {
   1848     size_t offset = omit_auth_cases[i].input_offset;
   1849     FormatUrl(GURL(omit_auth_cases[i].input_url), "en",
   1850                    kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
   1851                    NULL, NULL, &offset);
   1852     EXPECT_EQ(omit_auth_cases[i].output_offset, offset);
   1853   }
   1854 
   1855   url_size = 30;
   1856   offsets.clear();
   1857   for (size_t i = 0; i < url_size; ++i)
   1858     offsets.push_back(i);
   1859   FormatUrlWithOffsets(GURL("http://foo:bar@www.google.com/"), "en",
   1860                        kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
   1861                        NULL, NULL, &offsets);
   1862   for (size_t i = 0; i < 7; ++i)
   1863     EXPECT_EQ(i, offsets[i]);
   1864   for (size_t i = 7; i < 15; ++i)
   1865     EXPECT_EQ(kNpos, offsets[i]);
   1866   for (size_t i = 16; i < url_size; ++i)
   1867     EXPECT_EQ(i - 8 , offsets[i]);
   1868 
   1869   const AdjustOffsetCase view_source_cases[] = {
   1870     {0, 0},
   1871     {3, 3},
   1872     {11, 11},
   1873     {12, 12},
   1874     {13, 13},
   1875     {18, 18},
   1876     {19, string16::npos},
   1877     {20, string16::npos},
   1878     {23, 19},
   1879     {26, 22},
   1880     {string16::npos, string16::npos},
   1881   };
   1882   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(view_source_cases); ++i) {
   1883     size_t offset = view_source_cases[i].input_offset;
   1884     FormatUrl(GURL("view-source:http://foo@www.google.com/"), "en",
   1885                    kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
   1886                    NULL, NULL, &offset);
   1887     EXPECT_EQ(view_source_cases[i].output_offset, offset);
   1888   }
   1889 
   1890   url_size = 38;
   1891   offsets.clear();
   1892   for (size_t i = 0; i < url_size; ++i)
   1893     offsets.push_back(i);
   1894   FormatUrlWithOffsets(GURL("view-source:http://foo@www.google.com/"), "en",
   1895                        kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
   1896                        NULL, NULL, &offsets);
   1897   size_t expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
   1898                        17, 18, kNpos, kNpos, kNpos, kNpos, 19, 20, 21, 22, 23,
   1899                        24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
   1900   ASSERT_EQ(url_size, arraysize(expected));
   1901   for (size_t i = 0; i < url_size; ++i)
   1902     EXPECT_EQ(expected[i], offsets[i]);
   1903 
   1904   const AdjustOffsetCase idn_hostname_cases[] = {
   1905     {8, string16::npos},
   1906     {16, string16::npos},
   1907     {24, string16::npos},
   1908     {25, 12},
   1909     {30, 17},
   1910   };
   1911   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_hostname_cases); ++i) {
   1912     size_t offset = idn_hostname_cases[i].input_offset;
   1913     // "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/"
   1914     FormatUrl(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), "ja",
   1915                    kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
   1916                    NULL, NULL, &offset);
   1917     EXPECT_EQ(idn_hostname_cases[i].output_offset, offset);
   1918   }
   1919 
   1920   url_size = 33;
   1921   offsets.clear();
   1922   for (size_t i = 0; i < url_size; ++i)
   1923     offsets.push_back(i);
   1924   FormatUrlWithOffsets(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), "ja",
   1925                        kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
   1926                        NULL, NULL, &offsets);
   1927   size_t expected_1[] = {0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos,
   1928                          kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
   1929                          kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15, 16,
   1930                          17, 18, 19};
   1931   ASSERT_EQ(url_size, arraysize(expected_1));
   1932   for (size_t i = 0; i < url_size; ++i)
   1933     EXPECT_EQ(expected_1[i], offsets[i]);
   1934 
   1935   const AdjustOffsetCase unescape_cases[] = {
   1936     {25, 25},
   1937     {26, string16::npos},
   1938     {27, string16::npos},
   1939     {28, 26},
   1940     {35, string16::npos},
   1941     {41, 31},
   1942     {59, 33},
   1943     {60, string16::npos},
   1944     {67, string16::npos},
   1945     {68, string16::npos},
   1946   };
   1947   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(unescape_cases); ++i) {
   1948     size_t offset = unescape_cases[i].input_offset;
   1949     // "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB"
   1950     FormatUrl(GURL(
   1951         "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"),
   1952         "en", kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES, NULL,
   1953         NULL, &offset);
   1954     EXPECT_EQ(unescape_cases[i].output_offset, offset);
   1955   }
   1956 
   1957   url_size = 68;
   1958   offsets.clear();
   1959   for (size_t i = 0; i < url_size; ++i)
   1960     offsets.push_back(i);
   1961   FormatUrlWithOffsets(GURL(
   1962       "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"),
   1963       "en", kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES, NULL, NULL,
   1964       &offsets);
   1965   size_t expected_2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
   1966                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, kNpos, kNpos,
   1967                          26, 27, 28, 29, 30, kNpos, kNpos, kNpos, kNpos, kNpos,
   1968                          kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos,
   1969                          kNpos, kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos,
   1970                          kNpos, kNpos, kNpos, kNpos, kNpos, 33, kNpos, kNpos,
   1971                          kNpos, kNpos, kNpos, kNpos, kNpos, kNpos};
   1972   ASSERT_EQ(url_size, arraysize(expected_2));
   1973   for (size_t i = 0; i < url_size; ++i)
   1974     EXPECT_EQ(expected_2[i], offsets[i]);
   1975 
   1976   const AdjustOffsetCase ref_cases[] = {
   1977     {30, 30},
   1978     {31, 31},
   1979     {32, string16::npos},
   1980     {34, 32},
   1981     {35, string16::npos},
   1982     {37, 33},
   1983     {38, string16::npos},
   1984   };
   1985   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(ref_cases); ++i) {
   1986     size_t offset = ref_cases[i].input_offset;
   1987     // "http://www.google.com/foo.html#\x30B0\x30B0z"
   1988     FormatUrl(GURL(
   1989         "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), "en",
   1990         kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, NULL,
   1991         &offset);
   1992     EXPECT_EQ(ref_cases[i].output_offset, offset);
   1993   }
   1994 
   1995   url_size = 38;
   1996   offsets.clear();
   1997   for (size_t i = 0; i < url_size; ++i)
   1998     offsets.push_back(i);
   1999   // "http://www.google.com/foo.html#\x30B0\x30B0z"
   2000   FormatUrlWithOffsets(GURL(
   2001       "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), "en",
   2002       kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, NULL,
   2003       &offsets);
   2004   size_t expected_3[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
   2005                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
   2006                          30, 31, kNpos, kNpos, 32, kNpos, kNpos, 33};
   2007   ASSERT_EQ(url_size, arraysize(expected_3));
   2008   for (size_t i = 0; i < url_size; ++i)
   2009     EXPECT_EQ(expected_3[i], offsets[i]);
   2010 
   2011   const AdjustOffsetCase omit_http_cases[] = {
   2012     {0, string16::npos},
   2013     {3, string16::npos},
   2014     {7, 0},
   2015     {8, 1},
   2016   };
   2017   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_http_cases); ++i) {
   2018     size_t offset = omit_http_cases[i].input_offset;
   2019     FormatUrl(GURL("http://www.google.com"), "en",
   2020         kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offset);
   2021     EXPECT_EQ(omit_http_cases[i].output_offset, offset);
   2022   }
   2023 
   2024   url_size = 23;
   2025   offsets.clear();
   2026   for (size_t i = 0; i < url_size; ++i)
   2027     offsets.push_back(i);
   2028   FormatUrlWithOffsets(GURL("http://www.google.com"), "en",
   2029       kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets);
   2030   size_t expected_4[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1,
   2031                          2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, kNpos};
   2032   ASSERT_EQ(url_size, arraysize(expected_4));
   2033   for (size_t i = 0; i < url_size; ++i)
   2034     EXPECT_EQ(expected_4[i], offsets[i]);
   2035 
   2036   const AdjustOffsetCase omit_http_start_with_ftp[] = {
   2037     {0, 0},
   2038     {3, 3},
   2039     {8, 8},
   2040   };
   2041   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_http_start_with_ftp); ++i) {
   2042     size_t offset = omit_http_start_with_ftp[i].input_offset;
   2043     FormatUrl(GURL("http://ftp.google.com"), "en",
   2044         kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offset);
   2045     EXPECT_EQ(omit_http_start_with_ftp[i].output_offset, offset);
   2046   }
   2047 
   2048   url_size = 23;
   2049   offsets.clear();
   2050   for (size_t i = 0; i < url_size; ++i)
   2051     offsets.push_back(i);
   2052   FormatUrlWithOffsets(GURL("http://ftp.google.com"), "en",
   2053       kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets);
   2054   size_t expected_5[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
   2055                          16, 17, 18, 19, 20, 21, kNpos};
   2056   ASSERT_EQ(url_size, arraysize(expected_5));
   2057   for (size_t i = 0; i < url_size; ++i)
   2058     EXPECT_EQ(expected_5[i], offsets[i]);
   2059 
   2060   const AdjustOffsetCase omit_all_cases[] = {
   2061     {12, 0},
   2062     {13, 1},
   2063     {0, string16::npos},
   2064     {3, string16::npos},
   2065   };
   2066   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_all_cases); ++i) {
   2067     size_t offset = omit_all_cases[i].input_offset;
   2068     FormatUrl(GURL("http://user@foo.com/"), "en", kFormatUrlOmitAll,
   2069                    UnescapeRule::NORMAL, NULL, NULL, &offset);
   2070     EXPECT_EQ(omit_all_cases[i].output_offset, offset);
   2071   }
   2072 
   2073   url_size = 21;
   2074   offsets.clear();
   2075   for (size_t i = 0; i < url_size; ++i)
   2076     offsets.push_back(i);
   2077   FormatUrlWithOffsets(GURL("http://user@foo.com/"), "en", kFormatUrlOmitAll,
   2078                        UnescapeRule::NORMAL, NULL, NULL, &offsets);
   2079   size_t expected_6[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
   2080                          kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7,
   2081                          kNpos};
   2082   ASSERT_EQ(url_size, arraysize(expected_6));
   2083   for (size_t i = 0; i < url_size; ++i)
   2084     EXPECT_EQ(expected_6[i], offsets[i]);
   2085 }
   2086 
   2087 TEST(NetUtilTest, SimplifyUrlForRequest) {
   2088   struct {
   2089     const char* input_url;
   2090     const char* expected_simplified_url;
   2091   } tests[] = {
   2092     {
   2093       // Reference section should be stripped.
   2094       "http://www.google.com:78/foobar?query=1#hash",
   2095       "http://www.google.com:78/foobar?query=1",
   2096     },
   2097     {
   2098       // Reference section can itself contain #.
   2099       "http://192.168.0.1?query=1#hash#10#11#13#14",
   2100       "http://192.168.0.1?query=1",
   2101     },
   2102     { // Strip username/password.
   2103       "http://user:pass@google.com",
   2104       "http://google.com/",
   2105     },
   2106     { // Strip both the reference and the username/password.
   2107       "http://user:pass@google.com:80/sup?yo#X#X",
   2108       "http://google.com/sup?yo",
   2109     },
   2110     { // Try an HTTPS URL -- strip both the reference and the username/password.
   2111       "https://user:pass@google.com:80/sup?yo#X#X",
   2112       "https://google.com:80/sup?yo",
   2113     },
   2114     { // Try an FTP URL -- strip both the reference and the username/password.
   2115       "ftp://user:pass@google.com:80/sup?yo#X#X",
   2116       "ftp://google.com:80/sup?yo",
   2117     },
   2118     { // Try an nonstandard URL
   2119       "foobar://user:pass (at) google.com:80/sup?yo#X#X",
   2120       "foobar://user:pass (at) google.com:80/sup?yo#X#X",
   2121     },
   2122   };
   2123   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
   2124     SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s", i,
   2125                                     tests[i].input_url));
   2126     GURL input_url(GURL(tests[i].input_url));
   2127     GURL expected_url(GURL(tests[i].expected_simplified_url));
   2128     EXPECT_EQ(expected_url, SimplifyUrlForRequest(input_url));
   2129   }
   2130 }
   2131 
   2132 TEST(NetUtilTest, SetExplicitlyAllowedPortsTest) {
   2133   std::string invalid[] = { "1,2,a", "'1','2'", "1, 2, 3", "1 0,11,12" };
   2134   std::string valid[] = { "", "1", "1,2", "1,2,3", "10,11,12,13" };
   2135 
   2136   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(invalid); ++i) {
   2137     SetExplicitlyAllowedPorts(invalid[i]);
   2138     EXPECT_EQ(0, static_cast<int>(explicitly_allowed_ports.size()));
   2139   }
   2140 
   2141   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(valid); ++i) {
   2142     SetExplicitlyAllowedPorts(valid[i]);
   2143     EXPECT_EQ(i, explicitly_allowed_ports.size());
   2144   }
   2145 }
   2146 
   2147 TEST(NetUtilTest, GetHostOrSpecFromURL) {
   2148   EXPECT_EQ("example.com",
   2149             GetHostOrSpecFromURL(GURL("http://example.com/test")));
   2150   EXPECT_EQ("example.com",
   2151             GetHostOrSpecFromURL(GURL("http://example.com./test")));
   2152   EXPECT_EQ("file:///tmp/test.html",
   2153             GetHostOrSpecFromURL(GURL("file:///tmp/test.html")));
   2154 }
   2155 
   2156 // Test that invalid IP literals fail to parse.
   2157 TEST(NetUtilTest, ParseIPLiteralToNumber_FailParse) {
   2158   IPAddressNumber number;
   2159 
   2160   EXPECT_FALSE(ParseIPLiteralToNumber("bad value", &number));
   2161   EXPECT_FALSE(ParseIPLiteralToNumber("bad:value", &number));
   2162   EXPECT_FALSE(ParseIPLiteralToNumber("", &number));
   2163   EXPECT_FALSE(ParseIPLiteralToNumber("192.168.0.1:30", &number));
   2164   EXPECT_FALSE(ParseIPLiteralToNumber("  192.168.0.1  ", &number));
   2165   EXPECT_FALSE(ParseIPLiteralToNumber("[::1]", &number));
   2166 }
   2167 
   2168 // Test parsing an IPv4 literal.
   2169 TEST(NetUtilTest, ParseIPLiteralToNumber_IPv4) {
   2170   IPAddressNumber number;
   2171   EXPECT_TRUE(ParseIPLiteralToNumber("192.168.0.1", &number));
   2172   EXPECT_EQ("192,168,0,1", DumpIPNumber(number));
   2173 }
   2174 
   2175 // Test parsing an IPv6 literal.
   2176 TEST(NetUtilTest, ParseIPLiteralToNumber_IPv6) {
   2177   IPAddressNumber number;
   2178   EXPECT_TRUE(ParseIPLiteralToNumber("1:abcd::3:4:ff", &number));
   2179   EXPECT_EQ("0,1,171,205,0,0,0,0,0,0,0,3,0,4,0,255", DumpIPNumber(number));
   2180 }
   2181 
   2182 // Test mapping an IPv4 address to an IPv6 address.
   2183 TEST(NetUtilTest, ConvertIPv4NumberToIPv6Number) {
   2184   IPAddressNumber ipv4_number;
   2185   EXPECT_TRUE(ParseIPLiteralToNumber("192.168.0.1", &ipv4_number));
   2186 
   2187   IPAddressNumber ipv6_number =
   2188       ConvertIPv4NumberToIPv6Number(ipv4_number);
   2189 
   2190   // ::ffff:192.168.1.1
   2191   EXPECT_EQ("0,0,0,0,0,0,0,0,0,0,255,255,192,168,0,1",
   2192             DumpIPNumber(ipv6_number));
   2193 }
   2194 
   2195 // Test parsing invalid CIDR notation literals.
   2196 TEST(NetUtilTest, ParseCIDRBlock_Invalid) {
   2197   const char* bad_literals[] = {
   2198       "foobar",
   2199       "",
   2200       "192.168.0.1",
   2201       "::1",
   2202       "/",
   2203       "/1",
   2204       "1",
   2205       "192.168.1.1/-1",
   2206       "192.168.1.1/33",
   2207       "::1/-3",
   2208       "a::3/129",
   2209       "::1/x",
   2210       "192.168.0.1//11"
   2211   };
   2212 
   2213   for (size_t i = 0; i < arraysize(bad_literals); ++i) {
   2214     IPAddressNumber ip_number;
   2215     size_t prefix_length_in_bits;
   2216 
   2217     EXPECT_FALSE(ParseCIDRBlock(bad_literals[i],
   2218                                      &ip_number,
   2219                                      &prefix_length_in_bits));
   2220   }
   2221 }
   2222 
   2223 // Test parsing a valid CIDR notation literal.
   2224 TEST(NetUtilTest, ParseCIDRBlock_Valid) {
   2225   IPAddressNumber ip_number;
   2226   size_t prefix_length_in_bits;
   2227 
   2228   EXPECT_TRUE(ParseCIDRBlock("192.168.0.1/11",
   2229                                   &ip_number,
   2230                                   &prefix_length_in_bits));
   2231 
   2232   EXPECT_EQ("192,168,0,1", DumpIPNumber(ip_number));
   2233   EXPECT_EQ(11u, prefix_length_in_bits);
   2234 }
   2235 
   2236 TEST(NetUtilTest, IPNumberMatchesPrefix) {
   2237   struct {
   2238     const char* cidr_literal;
   2239     const char* ip_literal;
   2240     bool expected_to_match;
   2241   } tests[] = {
   2242     // IPv4 prefix with IPv4 inputs.
   2243     {
   2244       "10.10.1.32/27",
   2245       "10.10.1.44",
   2246       true
   2247     },
   2248     {
   2249       "10.10.1.32/27",
   2250       "10.10.1.90",
   2251       false
   2252     },
   2253     {
   2254       "10.10.1.32/27",
   2255       "10.10.1.90",
   2256       false
   2257     },
   2258 
   2259     // IPv6 prefix with IPv6 inputs.
   2260     {
   2261       "2001:db8::/32",
   2262       "2001:DB8:3:4::5",
   2263       true
   2264     },
   2265     {
   2266       "2001:db8::/32",
   2267       "2001:c8::",
   2268       false
   2269     },
   2270 
   2271     // IPv6 prefix with IPv4 inputs.
   2272     {
   2273       "2001:db8::/33",
   2274       "192.168.0.1",
   2275       false
   2276     },
   2277     {
   2278       "::ffff:192.168.0.1/112",
   2279       "192.168.33.77",
   2280       true
   2281     },
   2282 
   2283     // IPv4 prefix with IPv6 inputs.
   2284     {
   2285       "10.11.33.44/16",
   2286       "::ffff:0a0b:89",
   2287       true
   2288     },
   2289     {
   2290       "10.11.33.44/16",
   2291       "::ffff:10.12.33.44",
   2292       false
   2293     },
   2294   };
   2295   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
   2296     SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s, %s", i,
   2297                                     tests[i].cidr_literal,
   2298                                     tests[i].ip_literal));
   2299 
   2300     IPAddressNumber ip_number;
   2301     EXPECT_TRUE(ParseIPLiteralToNumber(tests[i].ip_literal, &ip_number));
   2302 
   2303     IPAddressNumber ip_prefix;
   2304     size_t prefix_length_in_bits;
   2305 
   2306     EXPECT_TRUE(ParseCIDRBlock(tests[i].cidr_literal,
   2307                                &ip_prefix,
   2308                                &prefix_length_in_bits));
   2309 
   2310     EXPECT_EQ(tests[i].expected_to_match,
   2311               IPNumberMatchesPrefix(ip_number,
   2312                                     ip_prefix,
   2313                                     prefix_length_in_bits));
   2314   }
   2315 }
   2316 
   2317 TEST(NetUtilTest, IsLocalhost) {
   2318   EXPECT_TRUE(net::IsLocalhost("localhost"));
   2319   EXPECT_TRUE(net::IsLocalhost("localhost.localdomain"));
   2320   EXPECT_TRUE(net::IsLocalhost("localhost6"));
   2321   EXPECT_TRUE(net::IsLocalhost("localhost6.localdomain6"));
   2322   EXPECT_TRUE(net::IsLocalhost("127.0.0.1"));
   2323   EXPECT_TRUE(net::IsLocalhost("127.0.1.0"));
   2324   EXPECT_TRUE(net::IsLocalhost("127.1.0.0"));
   2325   EXPECT_TRUE(net::IsLocalhost("127.0.0.255"));
   2326   EXPECT_TRUE(net::IsLocalhost("127.0.255.0"));
   2327   EXPECT_TRUE(net::IsLocalhost("127.255.0.0"));
   2328   EXPECT_TRUE(net::IsLocalhost("::1"));
   2329   EXPECT_TRUE(net::IsLocalhost("0:0:0:0:0:0:0:1"));
   2330 
   2331   EXPECT_FALSE(net::IsLocalhost("localhostx"));
   2332   EXPECT_FALSE(net::IsLocalhost("foo.localdomain"));
   2333   EXPECT_FALSE(net::IsLocalhost("localhost6x"));
   2334   EXPECT_FALSE(net::IsLocalhost("localhost.localdomain6"));
   2335   EXPECT_FALSE(net::IsLocalhost("localhost6.localdomain"));
   2336   EXPECT_FALSE(net::IsLocalhost("127.0.0.1.1"));
   2337   EXPECT_FALSE(net::IsLocalhost(".127.0.0.255"));
   2338   EXPECT_FALSE(net::IsLocalhost("::2"));
   2339   EXPECT_FALSE(net::IsLocalhost("::1:1"));
   2340   EXPECT_FALSE(net::IsLocalhost("0:0:0:0:1:0:0:1"));
   2341   EXPECT_FALSE(net::IsLocalhost("::1:1"));
   2342   EXPECT_FALSE(net::IsLocalhost("0:0:0:0:0:0:0:0:1"));
   2343 }
   2344 
   2345 // Verify GetNetworkList().
   2346 TEST(NetUtilTest, GetNetworkList) {
   2347   NetworkInterfaceList list;
   2348   ASSERT_TRUE(GetNetworkList(&list));
   2349 
   2350   for (NetworkInterfaceList::iterator it = list.begin();
   2351        it != list.end(); ++it) {
   2352     // Verify that the name is not empty.
   2353     EXPECT_FALSE(it->name.empty());
   2354 
   2355     // Verify that the address is correct.
   2356     EXPECT_TRUE(it->address.size() == kIPv4AddressSize ||
   2357                 it->address.size() == kIPv6AddressSize)
   2358         << "Invalid address of size " << it->address.size();
   2359     bool all_zeroes = true;
   2360     for (size_t i = 0; i < it->address.size(); ++i) {
   2361       if (it->address[i] != 0) {
   2362         all_zeroes = false;
   2363         break;
   2364       }
   2365     }
   2366     EXPECT_FALSE(all_zeroes);
   2367   }
   2368 }
   2369 
   2370 TEST(NetUtilTest, AdjustComponentOffset) {
   2371   std::vector<size_t> old_offsets;
   2372   for (size_t i = 0; i < 10; ++i)
   2373     old_offsets.push_back(i);
   2374   std::vector<size_t> new_offsets;
   2375   std::transform(old_offsets.begin(),
   2376                  old_offsets.end(),
   2377                  std::back_inserter(new_offsets),
   2378                  ClampComponentOffset(5));
   2379   size_t expected_1[] = {kNpos, kNpos, kNpos, kNpos, kNpos, 5, 6, 7, 8, 9};
   2380   EXPECT_EQ(new_offsets.size(), arraysize(expected_1));
   2381   EXPECT_EQ(new_offsets.size(), old_offsets.size());
   2382   for (size_t i = 0; i < arraysize(expected_1); ++i)
   2383     EXPECT_EQ(expected_1[i], new_offsets[i]);
   2384 }
   2385 
   2386 }  // namespace net
   2387