Home | History | Annotate | Download | only in common
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "extensions/common/url_pattern.h"
      6 
      7 #include "base/strings/string_number_conversions.h"
      8 #include "base/strings/string_piece.h"
      9 #include "base/strings/string_split.h"
     10 #include "base/strings/string_util.h"
     11 #include "content/public/common/url_constants.h"
     12 #include "extensions/common/constants.h"
     13 #include "url/gurl.h"
     14 #include "url/url_util.h"
     15 
     16 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
     17 
     18 namespace {
     19 
     20 // TODO(aa): What about more obscure schemes like data: and javascript: ?
     21 // Note: keep this array in sync with kValidSchemeMasks.
     22 const char* kValidSchemes[] = {
     23   chrome::kHttpScheme,
     24   chrome::kHttpsScheme,
     25   chrome::kFileScheme,
     26   chrome::kFtpScheme,
     27   chrome::kChromeUIScheme,
     28   extensions::kExtensionScheme,
     29   chrome::kFileSystemScheme,
     30 };
     31 
     32 const int kValidSchemeMasks[] = {
     33   URLPattern::SCHEME_HTTP,
     34   URLPattern::SCHEME_HTTPS,
     35   URLPattern::SCHEME_FILE,
     36   URLPattern::SCHEME_FTP,
     37   URLPattern::SCHEME_CHROMEUI,
     38   URLPattern::SCHEME_EXTENSION,
     39   URLPattern::SCHEME_FILESYSTEM,
     40 };
     41 
     42 COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks),
     43                must_keep_these_arrays_in_sync);
     44 
     45 const char kParseSuccess[] = "Success.";
     46 const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator.";
     47 const char kParseErrorInvalidScheme[] = "Invalid scheme.";
     48 const char kParseErrorWrongSchemeType[] = "Wrong scheme type.";
     49 const char kParseErrorEmptyHost[] = "Host can not be empty.";
     50 const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard.";
     51 const char kParseErrorEmptyPath[] = "Empty path.";
     52 const char kParseErrorInvalidPort[] = "Invalid port.";
     53 
     54 // Message explaining each URLPattern::ParseResult.
     55 const char* const kParseResultMessages[] = {
     56   kParseSuccess,
     57   kParseErrorMissingSchemeSeparator,
     58   kParseErrorInvalidScheme,
     59   kParseErrorWrongSchemeType,
     60   kParseErrorEmptyHost,
     61   kParseErrorInvalidHostWildcard,
     62   kParseErrorEmptyPath,
     63   kParseErrorInvalidPort,
     64 };
     65 
     66 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
     67                must_add_message_for_each_parse_result);
     68 
     69 const char kPathSeparator[] = "/";
     70 
     71 bool IsStandardScheme(const std::string& scheme) {
     72   // "*" gets the same treatment as a standard scheme.
     73   if (scheme == "*")
     74     return true;
     75 
     76   return url_util::IsStandard(scheme.c_str(),
     77       url_parse::Component(0, static_cast<int>(scheme.length())));
     78 }
     79 
     80 bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
     81   if (port == "*")
     82     return true;
     83 
     84   // Only accept non-wildcard ports if the scheme uses ports.
     85   if (url_canon::DefaultPortForScheme(scheme.c_str(), scheme.length()) ==
     86       url_parse::PORT_UNSPECIFIED) {
     87     return false;
     88   }
     89 
     90   int parsed_port = url_parse::PORT_UNSPECIFIED;
     91   if (!base::StringToInt(port, &parsed_port))
     92     return false;
     93   return (parsed_port >= 0) && (parsed_port < 65536);
     94 }
     95 
     96 // Returns |path| with the trailing wildcard stripped if one existed.
     97 //
     98 // The functions that rely on this (OverlapsWith and Contains) are only
     99 // called for the patterns inside URLPatternSet. In those cases, we know that
    100 // the path will have only a single wildcard at the end. This makes figuring
    101 // out overlap much easier. It seems like there is probably a computer-sciency
    102 // way to solve the general case, but we don't need that yet.
    103 std::string StripTrailingWildcard(const std::string& path) {
    104   size_t wildcard_index = path.find('*');
    105   size_t path_last = path.size() - 1;
    106   DCHECK(wildcard_index == std::string::npos || wildcard_index == path_last);
    107   return wildcard_index == path_last ? path.substr(0, path_last) : path;
    108 }
    109 
    110 }  // namespace
    111 
    112 URLPattern::URLPattern()
    113     : valid_schemes_(SCHEME_NONE),
    114       match_all_urls_(false),
    115       match_subdomains_(false),
    116       port_("*") {}
    117 
    118 URLPattern::URLPattern(int valid_schemes)
    119     : valid_schemes_(valid_schemes),
    120       match_all_urls_(false),
    121       match_subdomains_(false),
    122       port_("*") {}
    123 
    124 URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
    125     // Strict error checking is used, because this constructor is only
    126     // appropriate when we know |pattern| is valid.
    127     : valid_schemes_(valid_schemes),
    128       match_all_urls_(false),
    129       match_subdomains_(false),
    130       port_("*") {
    131   if (PARSE_SUCCESS != Parse(pattern))
    132     NOTREACHED() << "URLPattern is invalid: " << pattern;
    133 }
    134 
    135 URLPattern::~URLPattern() {
    136 }
    137 
    138 bool URLPattern::operator<(const URLPattern& other) const {
    139   return GetAsString() < other.GetAsString();
    140 }
    141 
    142 bool URLPattern::operator==(const URLPattern& other) const {
    143   return GetAsString() == other.GetAsString();
    144 }
    145 
    146 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
    147   spec_.clear();
    148   SetMatchAllURLs(false);
    149   SetMatchSubdomains(false);
    150   SetPort("*");
    151 
    152   // Special case pattern to match every valid URL.
    153   if (pattern == kAllUrlsPattern) {
    154     SetMatchAllURLs(true);
    155     return PARSE_SUCCESS;
    156   }
    157 
    158   // Parse out the scheme.
    159   size_t scheme_end_pos = pattern.find(content::kStandardSchemeSeparator);
    160   bool has_standard_scheme_separator = true;
    161 
    162   // Some urls also use ':' alone as the scheme separator.
    163   if (scheme_end_pos == std::string::npos) {
    164     scheme_end_pos = pattern.find(':');
    165     has_standard_scheme_separator = false;
    166   }
    167 
    168   if (scheme_end_pos == std::string::npos)
    169     return PARSE_ERROR_MISSING_SCHEME_SEPARATOR;
    170 
    171   if (!SetScheme(pattern.substr(0, scheme_end_pos)))
    172     return PARSE_ERROR_INVALID_SCHEME;
    173 
    174   bool standard_scheme = IsStandardScheme(scheme_);
    175   if (standard_scheme != has_standard_scheme_separator)
    176     return PARSE_ERROR_WRONG_SCHEME_SEPARATOR;
    177 
    178   // Advance past the scheme separator.
    179   scheme_end_pos +=
    180       (standard_scheme ? strlen(content::kStandardSchemeSeparator) : 1);
    181   if (scheme_end_pos >= pattern.size())
    182     return PARSE_ERROR_EMPTY_HOST;
    183 
    184   // Parse out the host and path.
    185   size_t host_start_pos = scheme_end_pos;
    186   size_t path_start_pos = 0;
    187 
    188   if (!standard_scheme) {
    189     path_start_pos = host_start_pos;
    190   } else if (scheme_ == chrome::kFileScheme) {
    191     size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
    192     if (host_end_pos == std::string::npos) {
    193       // Allow hostname omission.
    194       // e.g. file://* is interpreted as file:///*,
    195       // file://foo* is interpreted as file:///foo*.
    196       path_start_pos = host_start_pos - 1;
    197     } else {
    198       // Ignore hostname if scheme is file://.
    199       // e.g. file://localhost/foo is equal to file:///foo.
    200       path_start_pos = host_end_pos;
    201     }
    202   } else {
    203     size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
    204 
    205     // Host is required.
    206     if (host_start_pos == host_end_pos)
    207       return PARSE_ERROR_EMPTY_HOST;
    208 
    209     if (host_end_pos == std::string::npos)
    210       return PARSE_ERROR_EMPTY_PATH;
    211 
    212     host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos);
    213 
    214     // The first component can optionally be '*' to match all subdomains.
    215     std::vector<std::string> host_components;
    216     base::SplitString(host_, '.', &host_components);
    217     if (host_components[0] == "*") {
    218       match_subdomains_ = true;
    219       host_components.erase(host_components.begin(),
    220                             host_components.begin() + 1);
    221     }
    222     host_ = JoinString(host_components, '.');
    223 
    224     path_start_pos = host_end_pos;
    225   }
    226 
    227   SetPath(pattern.substr(path_start_pos));
    228 
    229   size_t port_pos = host_.find(':');
    230   if (port_pos != std::string::npos) {
    231     if (!SetPort(host_.substr(port_pos + 1)))
    232       return PARSE_ERROR_INVALID_PORT;
    233     host_ = host_.substr(0, port_pos);
    234   }
    235 
    236   // No other '*' can occur in the host, though. This isn't necessary, but is
    237   // done as a convenience to developers who might otherwise be confused and
    238   // think '*' works as a glob in the host.
    239   if (host_.find('*') != std::string::npos)
    240     return PARSE_ERROR_INVALID_HOST_WILDCARD;
    241 
    242   return PARSE_SUCCESS;
    243 }
    244 
    245 void URLPattern::SetValidSchemes(int valid_schemes) {
    246   spec_.clear();
    247   valid_schemes_ = valid_schemes;
    248 }
    249 
    250 void URLPattern::SetHost(const std::string& host) {
    251   spec_.clear();
    252   host_ = host;
    253 }
    254 
    255 void URLPattern::SetMatchAllURLs(bool val) {
    256   spec_.clear();
    257   match_all_urls_ = val;
    258 
    259   if (val) {
    260     match_subdomains_ = true;
    261     scheme_ = "*";
    262     host_.clear();
    263     SetPath("/*");
    264   }
    265 }
    266 
    267 void URLPattern::SetMatchSubdomains(bool val) {
    268   spec_.clear();
    269   match_subdomains_ = val;
    270 }
    271 
    272 bool URLPattern::SetScheme(const std::string& scheme) {
    273   spec_.clear();
    274   scheme_ = scheme;
    275   if (scheme_ == "*") {
    276     valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
    277   } else if (!IsValidScheme(scheme_)) {
    278     return false;
    279   }
    280   return true;
    281 }
    282 
    283 bool URLPattern::IsValidScheme(const std::string& scheme) const {
    284   if (valid_schemes_ == SCHEME_ALL)
    285     return true;
    286 
    287   for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
    288     if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i]))
    289       return true;
    290   }
    291 
    292   return false;
    293 }
    294 
    295 void URLPattern::SetPath(const std::string& path) {
    296   spec_.clear();
    297   path_ = path;
    298   path_escaped_ = path_;
    299   ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
    300   ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
    301 }
    302 
    303 bool URLPattern::SetPort(const std::string& port) {
    304   spec_.clear();
    305   if (IsValidPortForScheme(scheme_, port)) {
    306     port_ = port;
    307     return true;
    308   }
    309   return false;
    310 }
    311 
    312 bool URLPattern::MatchesURL(const GURL& test) const {
    313   const GURL* test_url = &test;
    314   bool has_inner_url = test.inner_url() != NULL;
    315 
    316   if (has_inner_url) {
    317     if (!test.SchemeIsFileSystem())
    318       return false;  // The only nested URLs we handle are filesystem URLs.
    319     test_url = test.inner_url();
    320   }
    321 
    322   if (!MatchesScheme(test_url->scheme()))
    323     return false;
    324 
    325   if (match_all_urls_)
    326     return true;
    327 
    328   std::string path_for_request = test.PathForRequest();
    329   if (has_inner_url)
    330     path_for_request = test_url->path() + path_for_request;
    331 
    332   return MatchesSecurityOriginHelper(*test_url) &&
    333          MatchesPath(path_for_request);
    334 }
    335 
    336 bool URLPattern::MatchesSecurityOrigin(const GURL& test) const {
    337   const GURL* test_url = &test;
    338   bool has_inner_url = test.inner_url() != NULL;
    339 
    340   if (has_inner_url) {
    341     if (!test.SchemeIsFileSystem())
    342       return false;  // The only nested URLs we handle are filesystem URLs.
    343     test_url = test.inner_url();
    344   }
    345 
    346   if (!MatchesScheme(test_url->scheme()))
    347     return false;
    348 
    349   if (match_all_urls_)
    350     return true;
    351 
    352   return MatchesSecurityOriginHelper(*test_url);
    353 }
    354 
    355 bool URLPattern::MatchesScheme(const std::string& test) const {
    356   if (!IsValidScheme(test))
    357     return false;
    358 
    359   return scheme_ == "*" || test == scheme_;
    360 }
    361 
    362 bool URLPattern::MatchesHost(const std::string& host) const {
    363   std::string test(chrome::kHttpScheme);
    364   test += content::kStandardSchemeSeparator;
    365   test += host;
    366   test += "/";
    367   return MatchesHost(GURL(test));
    368 }
    369 
    370 bool URLPattern::MatchesHost(const GURL& test) const {
    371   // If the hosts are exactly equal, we have a match.
    372   if (test.host() == host_)
    373     return true;
    374 
    375   // If we're matching subdomains, and we have no host in the match pattern,
    376   // that means that we're matching all hosts, which means we have a match no
    377   // matter what the test host is.
    378   if (match_subdomains_ && host_.empty())
    379     return true;
    380 
    381   // Otherwise, we can only match if our match pattern matches subdomains.
    382   if (!match_subdomains_)
    383     return false;
    384 
    385   // We don't do subdomain matching against IP addresses, so we can give up now
    386   // if the test host is an IP address.
    387   if (test.HostIsIPAddress())
    388     return false;
    389 
    390   // Check if the test host is a subdomain of our host.
    391   if (test.host().length() <= (host_.length() + 1))
    392     return false;
    393 
    394   if (test.host().compare(test.host().length() - host_.length(),
    395                           host_.length(), host_) != 0)
    396     return false;
    397 
    398   return test.host()[test.host().length() - host_.length() - 1] == '.';
    399 }
    400 
    401 bool URLPattern::MatchesPath(const std::string& test) const {
    402   // Make the behaviour of OverlapsWith consistent with MatchesURL, which is
    403   // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'.
    404   if (test + "/*" == path_escaped_)
    405     return true;
    406 
    407   return MatchPattern(test, path_escaped_);
    408 }
    409 
    410 const std::string& URLPattern::GetAsString() const {
    411   if (!spec_.empty())
    412     return spec_;
    413 
    414   if (match_all_urls_) {
    415     spec_ = kAllUrlsPattern;
    416     return spec_;
    417   }
    418 
    419   bool standard_scheme = IsStandardScheme(scheme_);
    420 
    421   std::string spec = scheme_ +
    422       (standard_scheme ? content::kStandardSchemeSeparator : ":");
    423 
    424   if (scheme_ != chrome::kFileScheme && standard_scheme) {
    425     if (match_subdomains_) {
    426       spec += "*";
    427       if (!host_.empty())
    428         spec += ".";
    429     }
    430 
    431     if (!host_.empty())
    432       spec += host_;
    433 
    434     if (port_ != "*") {
    435       spec += ":";
    436       spec += port_;
    437     }
    438   }
    439 
    440   if (!path_.empty())
    441     spec += path_;
    442 
    443   spec_ = spec;
    444   return spec_;
    445 }
    446 
    447 bool URLPattern::OverlapsWith(const URLPattern& other) const {
    448   if (match_all_urls() || other.match_all_urls())
    449     return true;
    450   return (MatchesAnyScheme(other.GetExplicitSchemes()) ||
    451           other.MatchesAnyScheme(GetExplicitSchemes()))
    452       && (MatchesHost(other.host()) || other.MatchesHost(host()))
    453       && (MatchesPortPattern(other.port()) || other.MatchesPortPattern(port()))
    454       && (MatchesPath(StripTrailingWildcard(other.path())) ||
    455           other.MatchesPath(StripTrailingWildcard(path())));
    456 }
    457 
    458 bool URLPattern::Contains(const URLPattern& other) const {
    459   if (match_all_urls())
    460     return true;
    461   return MatchesAllSchemes(other.GetExplicitSchemes())
    462       && MatchesHost(other.host())
    463       && MatchesPortPattern(other.port())
    464       && MatchesPath(StripTrailingWildcard(other.path()));
    465 }
    466 
    467 bool URLPattern::MatchesAnyScheme(
    468     const std::vector<std::string>& schemes) const {
    469   for (std::vector<std::string>::const_iterator i = schemes.begin();
    470        i != schemes.end(); ++i) {
    471     if (MatchesScheme(*i))
    472       return true;
    473   }
    474 
    475   return false;
    476 }
    477 
    478 bool URLPattern::MatchesAllSchemes(
    479     const std::vector<std::string>& schemes) const {
    480   for (std::vector<std::string>::const_iterator i = schemes.begin();
    481        i != schemes.end(); ++i) {
    482     if (!MatchesScheme(*i))
    483       return false;
    484   }
    485 
    486   return true;
    487 }
    488 
    489 bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const {
    490   // Ignore hostname if scheme is file://.
    491   if (scheme_ != chrome::kFileScheme && !MatchesHost(test))
    492     return false;
    493 
    494   if (!MatchesPortPattern(base::IntToString(test.EffectiveIntPort())))
    495     return false;
    496 
    497   return true;
    498 }
    499 
    500 bool URLPattern::MatchesPortPattern(const std::string& port) const {
    501   return port_ == "*" || port_ == port;
    502 }
    503 
    504 std::vector<std::string> URLPattern::GetExplicitSchemes() const {
    505   std::vector<std::string> result;
    506 
    507   if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) {
    508     result.push_back(scheme_);
    509     return result;
    510   }
    511 
    512   for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
    513     if (MatchesScheme(kValidSchemes[i])) {
    514       result.push_back(kValidSchemes[i]);
    515     }
    516   }
    517 
    518   return result;
    519 }
    520 
    521 std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const {
    522   std::vector<std::string> explicit_schemes = GetExplicitSchemes();
    523   std::vector<URLPattern> result;
    524 
    525   for (std::vector<std::string>::const_iterator i = explicit_schemes.begin();
    526        i != explicit_schemes.end(); ++i) {
    527     URLPattern temp = *this;
    528     temp.SetScheme(*i);
    529     temp.SetMatchAllURLs(false);
    530     result.push_back(temp);
    531   }
    532 
    533   return result;
    534 }
    535 
    536 // static
    537 const char* URLPattern::GetParseResultString(
    538     URLPattern::ParseResult parse_result) {
    539   return kParseResultMessages[parse_result];
    540 }
    541