Home | History | Annotate | Download | only in net
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <stdlib.h>
      6 
      7 #include "base/basictypes.h"
      8 #include "base/file_util.h"
      9 #include "base/path_service.h"
     10 #include "base/string_util.h"
     11 #include "base/utf_string_conversions.h"
     12 #include "chrome/browser/net/url_fixer_upper.h"
     13 #include "chrome/common/chrome_paths.h"
     14 #include "googleurl/src/url_parse.h"
     15 #include "googleurl/src/gurl.h"
     16 #include "net/base/net_util.h"
     17 #include "testing/gtest/include/gtest/gtest.h"
     18 
     19 namespace {
     20   class URLFixerUpperTest : public testing::Test {
     21   };
     22 };
     23 
     24 namespace url_parse {
     25 
     26 std::ostream& operator<<(std::ostream& os, const Component& part) {
     27   return os << "(begin=" << part.begin << ", len=" << part.len << ")";
     28 }
     29 
     30 }  // namespace url_parse
     31 
     32 struct segment_case {
     33   const std::string input;
     34   const std::string result;
     35   const url_parse::Component scheme;
     36   const url_parse::Component username;
     37   const url_parse::Component password;
     38   const url_parse::Component host;
     39   const url_parse::Component port;
     40   const url_parse::Component path;
     41   const url_parse::Component query;
     42   const url_parse::Component ref;
     43 };
     44 
     45 static const segment_case segment_cases[] = {
     46   { "http://www.google.com/", "http",
     47     url_parse::Component(0, 4), // scheme
     48     url_parse::Component(), // username
     49     url_parse::Component(), // password
     50     url_parse::Component(7, 14), // host
     51     url_parse::Component(), // port
     52     url_parse::Component(21, 1), // path
     53     url_parse::Component(), // query
     54     url_parse::Component(), // ref
     55   },
     56   { "aBoUt:vErSiOn", "about",
     57     url_parse::Component(0, 5), // scheme
     58     url_parse::Component(), // username
     59     url_parse::Component(), // password
     60     url_parse::Component(), // host
     61     url_parse::Component(), // port
     62     url_parse::Component(), // path
     63     url_parse::Component(), // query
     64     url_parse::Component(), // ref
     65   },
     66   { "    www.google.com:124?foo#", "http",
     67     url_parse::Component(), // scheme
     68     url_parse::Component(), // username
     69     url_parse::Component(), // password
     70     url_parse::Component(4, 14), // host
     71     url_parse::Component(19, 3), // port
     72     url_parse::Component(), // path
     73     url_parse::Component(23, 3), // query
     74     url_parse::Component(27, 0), // ref
     75   },
     76   { "user (at) www.google.com", "http",
     77     url_parse::Component(), // scheme
     78     url_parse::Component(0, 4), // username
     79     url_parse::Component(), // password
     80     url_parse::Component(5, 14), // host
     81     url_parse::Component(), // port
     82     url_parse::Component(), // path
     83     url_parse::Component(), // query
     84     url_parse::Component(), // ref
     85   },
     86   { "ftp:/user:P:a$$Wd (at) ..ftp.google.com...::23///pub?foo#bar", "ftp",
     87     url_parse::Component(0, 3), // scheme
     88     url_parse::Component(5, 4), // username
     89     url_parse::Component(10, 7), // password
     90     url_parse::Component(18, 20), // host
     91     url_parse::Component(39, 2), // port
     92     url_parse::Component(41, 6), // path
     93     url_parse::Component(48, 3), // query
     94     url_parse::Component(52, 3), // ref
     95   },
     96   { "[2001:db8::1]/path", "http",
     97     url_parse::Component(), // scheme
     98     url_parse::Component(), // username
     99     url_parse::Component(), // password
    100     url_parse::Component(0, 13), // host
    101     url_parse::Component(), // port
    102     url_parse::Component(13, 5), // path
    103     url_parse::Component(), // query
    104     url_parse::Component(), // ref
    105   },
    106   { "[::1]", "http",
    107     url_parse::Component(), // scheme
    108     url_parse::Component(), // username
    109     url_parse::Component(), // password
    110     url_parse::Component(0, 5), // host
    111     url_parse::Component(), // port
    112     url_parse::Component(), // path
    113     url_parse::Component(), // query
    114     url_parse::Component(), // ref
    115   },
    116   // Incomplete IPv6 addresses (will not canonicalize).
    117   { "[2001:4860:", "http",
    118     url_parse::Component(), // scheme
    119     url_parse::Component(), // username
    120     url_parse::Component(), // password
    121     url_parse::Component(0, 11), // host
    122     url_parse::Component(), // port
    123     url_parse::Component(), // path
    124     url_parse::Component(), // query
    125     url_parse::Component(), // ref
    126   },
    127   { "[2001:4860:/foo", "http",
    128     url_parse::Component(), // scheme
    129     url_parse::Component(), // username
    130     url_parse::Component(), // password
    131     url_parse::Component(0, 11), // host
    132     url_parse::Component(), // port
    133     url_parse::Component(11, 4), // path
    134     url_parse::Component(), // query
    135     url_parse::Component(), // ref
    136   },
    137   { "http://:b005::68]", "http",
    138     url_parse::Component(0, 4), // scheme
    139     url_parse::Component(), // username
    140     url_parse::Component(), // password
    141     url_parse::Component(7, 10), // host
    142     url_parse::Component(), // port
    143     url_parse::Component(), // path
    144     url_parse::Component(), // query
    145     url_parse::Component(), // ref
    146   },
    147   // Can't do anything useful with this.
    148   { ":b005::68]", "",
    149     url_parse::Component(0, 0), // scheme
    150     url_parse::Component(), // username
    151     url_parse::Component(), // password
    152     url_parse::Component(), // host
    153     url_parse::Component(), // port
    154     url_parse::Component(), // path
    155     url_parse::Component(), // query
    156     url_parse::Component(), // ref
    157   },
    158 };
    159 
    160 TEST(URLFixerUpperTest, SegmentURL) {
    161   std::string result;
    162   url_parse::Parsed parts;
    163 
    164   for (size_t i = 0; i < arraysize(segment_cases); ++i) {
    165     segment_case value = segment_cases[i];
    166     result = URLFixerUpper::SegmentURL(value.input, &parts);
    167     EXPECT_EQ(value.result, result);
    168     EXPECT_EQ(value.scheme, parts.scheme);
    169     EXPECT_EQ(value.username, parts.username);
    170     EXPECT_EQ(value.password, parts.password);
    171     EXPECT_EQ(value.host, parts.host);
    172     EXPECT_EQ(value.port, parts.port);
    173     EXPECT_EQ(value.path, parts.path);
    174     EXPECT_EQ(value.query, parts.query);
    175     EXPECT_EQ(value.ref, parts.ref);
    176   }
    177 }
    178 
    179 // Creates a file and returns its full name as well as the decomposed
    180 // version. Example:
    181 //    full_path = "c:\foo\bar.txt"
    182 //    dir = "c:\foo"
    183 //    file_name = "bar.txt"
    184 static bool MakeTempFile(const FilePath& dir,
    185                          const FilePath& file_name,
    186                          FilePath* full_path) {
    187   *full_path = dir.Append(file_name);
    188   return file_util::WriteFile(*full_path, "", 0) == 0;
    189 }
    190 
    191 // Returns true if the given URL is a file: URL that matches the given file
    192 static bool IsMatchingFileURL(const std::string& url,
    193                               const FilePath& full_file_path) {
    194   if (url.length() <= 8)
    195     return false;
    196   if (std::string("file:///") != url.substr(0, 8))
    197     return false; // no file:/// prefix
    198   if (url.find('\\') != std::string::npos)
    199     return false; // contains backslashes
    200 
    201   FilePath derived_path;
    202   net::FileURLToFilePath(GURL(url), &derived_path);
    203 
    204   return FilePath::CompareEqualIgnoreCase(derived_path.value(),
    205                                           full_file_path.value());
    206 }
    207 
    208 struct fixup_case {
    209   const std::string input;
    210   const std::string desired_tld;
    211   const std::string output;
    212 } fixup_cases[] = {
    213   {"www.google.com", "", "http://www.google.com/"},
    214   {" www.google.com     ", "", "http://www.google.com/"},
    215   {" foo.com/asdf  bar", "", "http://foo.com/asdf%20%20bar"},
    216   {"..www.google.com..", "", "http://www.google.com./"},
    217   {"http://......", "", "http://....../"},
    218   {"http://host.com:ninety-two/", "", "http://host.com:ninety-two/"},
    219   {"http://host.com:ninety-two?foo", "", "http://host.com:ninety-two/?foo"},
    220   {"google.com:123", "", "http://google.com:123/"},
    221   {"about:", "", "about:"},
    222   {"about:version", "", "about:version"},
    223   {"www:123", "", "http://www:123/"},
    224   {"   www:123", "", "http://www:123/"},
    225   {"www.google.com?foo", "", "http://www.google.com/?foo"},
    226   {"www.google.com#foo", "", "http://www.google.com/#foo"},
    227   {"www.google.com?", "", "http://www.google.com/?"},
    228   {"www.google.com#", "", "http://www.google.com/#"},
    229   {"www.google.com:123?foo#bar", "", "http://www.google.com:123/?foo#bar"},
    230   {"user (at) www.google.com", "", "http://user@www.google.com/"},
    231   {"\xE6\xB0\xB4.com" , "", "http://xn--1rw.com/"},
    232   // It would be better if this next case got treated as http, but I don't see
    233   // a clean way to guess this isn't the new-and-exciting "user" scheme.
    234   {"user:passwd (at) www.google.com:8080/", "", "user:passwd (at) www.google.com:8080/"},
    235   // {"file:///c:/foo/bar%20baz.txt", "", "file:///C:/foo/bar%20baz.txt"},
    236   {"ftp.google.com", "", "ftp://ftp.google.com/"},
    237   {"    ftp.google.com", "", "ftp://ftp.google.com/"},
    238   {"FTP.GooGle.com", "", "ftp://ftp.google.com/"},
    239   {"ftpblah.google.com", "", "http://ftpblah.google.com/"},
    240   {"ftp", "", "http://ftp/"},
    241   {"google.ftp.com", "", "http://google.ftp.com/"},
    242   // URLs which end with 0x85 (NEL in ISO-8859).
    243   { "http://google.com/search?q=\xd0\x85", "",
    244     "http://google.com/search?q=%D0%85"
    245   },
    246   { "http://google.com/search?q=\xec\x97\x85", "",
    247     "http://google.com/search?q=%EC%97%85"
    248   },
    249   { "http://google.com/search?q=\xf0\x90\x80\x85", "",
    250     "http://google.com/search?q=%F0%90%80%85"
    251   },
    252   // URLs which end with 0xA0 (non-break space in ISO-8859).
    253   { "http://google.com/search?q=\xd0\xa0", "",
    254     "http://google.com/search?q=%D0%A0"
    255   },
    256   { "http://google.com/search?q=\xec\x97\xa0", "",
    257     "http://google.com/search?q=%EC%97%A0"
    258   },
    259   { "http://google.com/search?q=\xf0\x90\x80\xa0", "",
    260     "http://google.com/search?q=%F0%90%80%A0"
    261   },
    262   // URLs containing IPv6 literals.
    263   {"[2001:db8::2]", "", "http://[2001:db8::2]/"},
    264   {"[::]:80", "", "http://[::]/"},
    265   {"[::]:80/path", "", "http://[::]/path"},
    266   {"[::]:180/path", "", "http://[::]:180/path"},
    267   // TODO(pmarks): Maybe we should parse bare IPv6 literals someday.
    268   {"::1", "", "::1"},
    269 };
    270 
    271 TEST(URLFixerUpperTest, FixupURL) {
    272   for (size_t i = 0; i < arraysize(fixup_cases); ++i) {
    273     fixup_case value = fixup_cases[i];
    274     EXPECT_EQ(value.output, URLFixerUpper::FixupURL(value.input,
    275         value.desired_tld).possibly_invalid_spec());
    276   }
    277 
    278   // Check the TLD-appending functionality
    279   fixup_case tld_cases[] = {
    280     {"google", "com", "http://www.google.com/"},
    281     {"google.", "com", "http://www.google.com/"},
    282     {"google..", "com", "http://www.google.com/"},
    283     {".google", "com", "http://www.google.com/"},
    284     {"www.google", "com", "http://www.google.com/"},
    285     {"google.com", "com", "http://google.com/"},
    286     {"http://google", "com", "http://www.google.com/"},
    287     {"..google..", "com", "http://www.google.com/"},
    288     {"http://www.google", "com", "http://www.google.com/"},
    289     {"9999999999999999", "com", "http://www.9999999999999999.com/"},
    290     {"google/foo", "com", "http://www.google.com/foo"},
    291     {"google.com/foo", "com", "http://google.com/foo"},
    292     {"google/?foo=.com", "com", "http://www.google.com/?foo=.com"},
    293     {"www.google/?foo=www.", "com", "http://www.google.com/?foo=www."},
    294     {"google.com/?foo=.com", "com", "http://google.com/?foo=.com"},
    295     {"http://www.google.com", "com", "http://www.google.com/"},
    296     {"google:123", "com", "http://www.google.com:123/"},
    297     {"http://google:123", "com", "http://www.google.com:123/"},
    298   };
    299   for (size_t i = 0; i < arraysize(tld_cases); ++i) {
    300     fixup_case value = tld_cases[i];
    301     EXPECT_EQ(value.output, URLFixerUpper::FixupURL(value.input,
    302         value.desired_tld).possibly_invalid_spec());
    303   }
    304 }
    305 
    306 // Test different types of file inputs to URIFixerUpper::FixupURL. This
    307 // doesn't go into the nice array of fixups above since the file input
    308 // has to exist.
    309 TEST(URLFixerUpperTest, FixupFile) {
    310   // this "original" filename is the one we tweak to get all the variations
    311   FilePath dir;
    312   FilePath original;
    313   ASSERT_TRUE(PathService::Get(chrome::DIR_APP, &dir));
    314   ASSERT_TRUE(MakeTempFile(
    315       dir,
    316       FilePath(FILE_PATH_LITERAL("url fixer upper existing file.txt")),
    317       &original));
    318 
    319   // reference path
    320   GURL golden(net::FilePathToFileURL(original));
    321 
    322   // c:\foo\bar.txt -> file:///c:/foo/bar.txt (basic)
    323 #if defined(OS_WIN)
    324   GURL fixedup(URLFixerUpper::FixupURL(WideToUTF8(original.value()),
    325                                        std::string()));
    326 #elif defined(OS_POSIX)
    327   GURL fixedup(URLFixerUpper::FixupURL(original.value(), std::string()));
    328 #endif
    329   EXPECT_EQ(golden, fixedup);
    330 
    331   // TODO(port): Make some equivalent tests for posix.
    332 #if defined(OS_WIN)
    333   // c|/foo\bar.txt -> file:///c:/foo/bar.txt (pipe allowed instead of colon)
    334   std::string cur(WideToUTF8(original.value()));
    335   EXPECT_EQ(':', cur[1]);
    336   cur[1] = '|';
    337   EXPECT_EQ(golden, URLFixerUpper::FixupURL(cur, std::string()));
    338 
    339   fixup_case file_cases[] = {
    340     {"c:\\This%20is a non-existent file.txt", "",
    341      "file:///C:/This%2520is%20a%20non-existent%20file.txt"},
    342 
    343     // \\foo\bar.txt -> file://foo/bar.txt
    344     // UNC paths, this file won't exist, but since there are no escapes, it
    345     // should be returned just converted to a file: URL.
    346     {"\\\\SomeNonexistentHost\\foo\\bar.txt", "",
    347      "file://somenonexistenthost/foo/bar.txt"},
    348     // We do this strictly, like IE8, which only accepts this form using
    349     // backslashes and not forward ones.  Turning "//foo" into "http" matches
    350     // Firefox and IE, silly though it may seem (it falls out of adding "http"
    351     // as the default protocol if you haven't entered one).
    352     {"//SomeNonexistentHost\\foo/bar.txt", "",
    353      "http://somenonexistenthost/foo/bar.txt"},
    354     {"file:///C:/foo/bar", "", "file:///C:/foo/bar"},
    355 
    356     // Much of the work here comes from GURL's canonicalization stage.
    357     {"file://C:/foo/bar", "", "file:///C:/foo/bar"},
    358     {"file:c:", "", "file:///C:/"},
    359     {"file:c:WINDOWS", "", "file:///C:/WINDOWS"},
    360     {"file:c|Program Files", "", "file:///C:/Program%20Files"},
    361     {"file:/file", "", "file://file/"},
    362     {"file:////////c:\\foo", "", "file:///C:/foo"},
    363     {"file://server/folder/file", "", "file://server/folder/file"},
    364 
    365     // These are fixups we don't do, but could consider:
    366     //
    367     //   {"file:///foo:/bar", "", "file://foo/bar"},
    368     //   {"file:/\\/server\\folder/file", "", "file://server/folder/file"},
    369   };
    370 #elif defined(OS_POSIX)
    371 
    372 #if defined(OS_MACOSX)
    373 #define HOME "/Users/"
    374 #else
    375 #define HOME "/home/"
    376 #endif
    377   URLFixerUpper::home_directory_override = "/foo";
    378   fixup_case file_cases[] = {
    379     // File URLs go through GURL, which tries to escape intelligently.
    380     {"/This%20is a non-existent file.txt", "",
    381      "file:///This%2520is%20a%20non-existent%20file.txt"},
    382     // A plain "/" refers to the root.
    383     {"/", "",
    384      "file:///"},
    385 
    386     // These rely on the above home_directory_override.
    387     {"~", "",
    388      "file:///foo"},
    389     {"~/bar", "",
    390      "file:///foo/bar"},
    391 
    392     // References to other users' homedirs.
    393     {"~foo", "",
    394      "file://" HOME "foo"},
    395     {"~x/blah", "",
    396      "file://" HOME "x/blah"},
    397   };
    398 #endif
    399   for (size_t i = 0; i < arraysize(file_cases); i++) {
    400     EXPECT_EQ(file_cases[i].output, URLFixerUpper::FixupURL(file_cases[i].input,
    401         file_cases[i].desired_tld).possibly_invalid_spec());
    402   }
    403 
    404   EXPECT_TRUE(file_util::Delete(original, false));
    405 }
    406 
    407 TEST(URLFixerUpperTest, FixupRelativeFile) {
    408   FilePath full_path, dir;
    409   FilePath file_part(FILE_PATH_LITERAL("url_fixer_upper_existing_file.txt"));
    410   ASSERT_TRUE(PathService::Get(chrome::DIR_APP, &dir));
    411   ASSERT_TRUE(MakeTempFile(dir, file_part, &full_path));
    412   ASSERT_TRUE(file_util::AbsolutePath(&full_path));
    413 
    414   // make sure we pass through good URLs
    415   for (size_t i = 0; i < arraysize(fixup_cases); ++i) {
    416     fixup_case value = fixup_cases[i];
    417 #if defined(OS_WIN)
    418     FilePath input(UTF8ToWide(value.input));
    419 #elif defined(OS_POSIX)
    420     FilePath input(value.input);
    421 #endif
    422     EXPECT_EQ(value.output,
    423         URLFixerUpper::FixupRelativeFile(dir, input).possibly_invalid_spec());
    424   }
    425 
    426   // make sure the existing file got fixed-up to a file URL, and that there
    427   // are no backslashes
    428   EXPECT_TRUE(IsMatchingFileURL(URLFixerUpper::FixupRelativeFile(dir,
    429       file_part).possibly_invalid_spec(), full_path));
    430   EXPECT_TRUE(file_util::Delete(full_path, false));
    431 
    432   // create a filename we know doesn't exist and make sure it doesn't get
    433   // fixed up to a file URL
    434   FilePath nonexistent_file(
    435       FILE_PATH_LITERAL("url_fixer_upper_nonexistent_file.txt"));
    436   std::string fixedup(URLFixerUpper::FixupRelativeFile(dir,
    437       nonexistent_file).possibly_invalid_spec());
    438   EXPECT_NE(std::string("file:///"), fixedup.substr(0, 8));
    439   EXPECT_FALSE(IsMatchingFileURL(fixedup, nonexistent_file));
    440 
    441   // make a subdir to make sure relative paths with directories work, also
    442   // test spaces:
    443   // "app_dir\url fixer-upper dir\url fixer-upper existing file.txt"
    444   FilePath sub_dir(FILE_PATH_LITERAL("url fixer-upper dir"));
    445   FilePath sub_file(FILE_PATH_LITERAL("url fixer-upper existing file.txt"));
    446   FilePath new_dir = dir.Append(sub_dir);
    447   file_util::CreateDirectory(new_dir);
    448   ASSERT_TRUE(MakeTempFile(new_dir, sub_file, &full_path));
    449   ASSERT_TRUE(file_util::AbsolutePath(&full_path));
    450 
    451   // test file in the subdir
    452   FilePath relative_file = sub_dir.Append(sub_file);
    453   EXPECT_TRUE(IsMatchingFileURL(URLFixerUpper::FixupRelativeFile(dir,
    454       relative_file).possibly_invalid_spec(), full_path));
    455 
    456   // test file in the subdir with different slashes and escaping.
    457   FilePath::StringType relative_file_str = sub_dir.value() +
    458       FILE_PATH_LITERAL("/") + sub_file.value();
    459   ReplaceSubstringsAfterOffset(&relative_file_str, 0,
    460       FILE_PATH_LITERAL(" "), FILE_PATH_LITERAL("%20"));
    461   EXPECT_TRUE(IsMatchingFileURL(URLFixerUpper::FixupRelativeFile(dir,
    462       FilePath(relative_file_str)).possibly_invalid_spec(), full_path));
    463 
    464   // test relative directories and duplicate slashes
    465   // (should resolve to the same file as above)
    466   relative_file_str = sub_dir.value() + FILE_PATH_LITERAL("/../") +
    467       sub_dir.value() + FILE_PATH_LITERAL("///./") + sub_file.value();
    468   EXPECT_TRUE(IsMatchingFileURL(URLFixerUpper::FixupRelativeFile(dir,
    469       FilePath(relative_file_str)).possibly_invalid_spec(), full_path));
    470 
    471   // done with the subdir
    472   EXPECT_TRUE(file_util::Delete(full_path, false));
    473   EXPECT_TRUE(file_util::Delete(new_dir, true));
    474 }
    475