1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "extensions/common/url_pattern.h" 6 7 #include "base/strings/string_number_conversions.h" 8 #include "base/strings/string_piece.h" 9 #include "base/strings/string_split.h" 10 #include "base/strings/string_util.h" 11 #include "content/public/common/url_constants.h" 12 #include "extensions/common/constants.h" 13 #include "url/gurl.h" 14 #include "url/url_util.h" 15 16 const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; 17 18 namespace { 19 20 // TODO(aa): What about more obscure schemes like data: and javascript: ? 21 // Note: keep this array in sync with kValidSchemeMasks. 22 const char* kValidSchemes[] = { 23 chrome::kHttpScheme, 24 chrome::kHttpsScheme, 25 chrome::kFileScheme, 26 chrome::kFtpScheme, 27 chrome::kChromeUIScheme, 28 extensions::kExtensionScheme, 29 chrome::kFileSystemScheme, 30 }; 31 32 const int kValidSchemeMasks[] = { 33 URLPattern::SCHEME_HTTP, 34 URLPattern::SCHEME_HTTPS, 35 URLPattern::SCHEME_FILE, 36 URLPattern::SCHEME_FTP, 37 URLPattern::SCHEME_CHROMEUI, 38 URLPattern::SCHEME_EXTENSION, 39 URLPattern::SCHEME_FILESYSTEM, 40 }; 41 42 COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks), 43 must_keep_these_arrays_in_sync); 44 45 const char kParseSuccess[] = "Success."; 46 const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator."; 47 const char kParseErrorInvalidScheme[] = "Invalid scheme."; 48 const char kParseErrorWrongSchemeType[] = "Wrong scheme type."; 49 const char kParseErrorEmptyHost[] = "Host can not be empty."; 50 const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard."; 51 const char kParseErrorEmptyPath[] = "Empty path."; 52 const char kParseErrorInvalidPort[] = "Invalid port."; 53 54 // Message explaining each URLPattern::ParseResult. 55 const char* const kParseResultMessages[] = { 56 kParseSuccess, 57 kParseErrorMissingSchemeSeparator, 58 kParseErrorInvalidScheme, 59 kParseErrorWrongSchemeType, 60 kParseErrorEmptyHost, 61 kParseErrorInvalidHostWildcard, 62 kParseErrorEmptyPath, 63 kParseErrorInvalidPort, 64 }; 65 66 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), 67 must_add_message_for_each_parse_result); 68 69 const char kPathSeparator[] = "/"; 70 71 bool IsStandardScheme(const std::string& scheme) { 72 // "*" gets the same treatment as a standard scheme. 73 if (scheme == "*") 74 return true; 75 76 return url_util::IsStandard(scheme.c_str(), 77 url_parse::Component(0, static_cast<int>(scheme.length()))); 78 } 79 80 bool IsValidPortForScheme(const std::string& scheme, const std::string& port) { 81 if (port == "*") 82 return true; 83 84 // Only accept non-wildcard ports if the scheme uses ports. 85 if (url_canon::DefaultPortForScheme(scheme.c_str(), scheme.length()) == 86 url_parse::PORT_UNSPECIFIED) { 87 return false; 88 } 89 90 int parsed_port = url_parse::PORT_UNSPECIFIED; 91 if (!base::StringToInt(port, &parsed_port)) 92 return false; 93 return (parsed_port >= 0) && (parsed_port < 65536); 94 } 95 96 // Returns |path| with the trailing wildcard stripped if one existed. 97 // 98 // The functions that rely on this (OverlapsWith and Contains) are only 99 // called for the patterns inside URLPatternSet. In those cases, we know that 100 // the path will have only a single wildcard at the end. This makes figuring 101 // out overlap much easier. It seems like there is probably a computer-sciency 102 // way to solve the general case, but we don't need that yet. 103 std::string StripTrailingWildcard(const std::string& path) { 104 size_t wildcard_index = path.find('*'); 105 size_t path_last = path.size() - 1; 106 DCHECK(wildcard_index == std::string::npos || wildcard_index == path_last); 107 return wildcard_index == path_last ? path.substr(0, path_last) : path; 108 } 109 110 } // namespace 111 112 URLPattern::URLPattern() 113 : valid_schemes_(SCHEME_NONE), 114 match_all_urls_(false), 115 match_subdomains_(false), 116 port_("*") {} 117 118 URLPattern::URLPattern(int valid_schemes) 119 : valid_schemes_(valid_schemes), 120 match_all_urls_(false), 121 match_subdomains_(false), 122 port_("*") {} 123 124 URLPattern::URLPattern(int valid_schemes, const std::string& pattern) 125 // Strict error checking is used, because this constructor is only 126 // appropriate when we know |pattern| is valid. 127 : valid_schemes_(valid_schemes), 128 match_all_urls_(false), 129 match_subdomains_(false), 130 port_("*") { 131 if (PARSE_SUCCESS != Parse(pattern)) 132 NOTREACHED() << "URLPattern is invalid: " << pattern; 133 } 134 135 URLPattern::~URLPattern() { 136 } 137 138 bool URLPattern::operator<(const URLPattern& other) const { 139 return GetAsString() < other.GetAsString(); 140 } 141 142 bool URLPattern::operator==(const URLPattern& other) const { 143 return GetAsString() == other.GetAsString(); 144 } 145 146 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) { 147 spec_.clear(); 148 SetMatchAllURLs(false); 149 SetMatchSubdomains(false); 150 SetPort("*"); 151 152 // Special case pattern to match every valid URL. 153 if (pattern == kAllUrlsPattern) { 154 SetMatchAllURLs(true); 155 return PARSE_SUCCESS; 156 } 157 158 // Parse out the scheme. 159 size_t scheme_end_pos = pattern.find(content::kStandardSchemeSeparator); 160 bool has_standard_scheme_separator = true; 161 162 // Some urls also use ':' alone as the scheme separator. 163 if (scheme_end_pos == std::string::npos) { 164 scheme_end_pos = pattern.find(':'); 165 has_standard_scheme_separator = false; 166 } 167 168 if (scheme_end_pos == std::string::npos) 169 return PARSE_ERROR_MISSING_SCHEME_SEPARATOR; 170 171 if (!SetScheme(pattern.substr(0, scheme_end_pos))) 172 return PARSE_ERROR_INVALID_SCHEME; 173 174 bool standard_scheme = IsStandardScheme(scheme_); 175 if (standard_scheme != has_standard_scheme_separator) 176 return PARSE_ERROR_WRONG_SCHEME_SEPARATOR; 177 178 // Advance past the scheme separator. 179 scheme_end_pos += 180 (standard_scheme ? strlen(content::kStandardSchemeSeparator) : 1); 181 if (scheme_end_pos >= pattern.size()) 182 return PARSE_ERROR_EMPTY_HOST; 183 184 // Parse out the host and path. 185 size_t host_start_pos = scheme_end_pos; 186 size_t path_start_pos = 0; 187 188 if (!standard_scheme) { 189 path_start_pos = host_start_pos; 190 } else if (scheme_ == chrome::kFileScheme) { 191 size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); 192 if (host_end_pos == std::string::npos) { 193 // Allow hostname omission. 194 // e.g. file://* is interpreted as file:///*, 195 // file://foo* is interpreted as file:///foo*. 196 path_start_pos = host_start_pos - 1; 197 } else { 198 // Ignore hostname if scheme is file://. 199 // e.g. file://localhost/foo is equal to file:///foo. 200 path_start_pos = host_end_pos; 201 } 202 } else { 203 size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); 204 205 // Host is required. 206 if (host_start_pos == host_end_pos) 207 return PARSE_ERROR_EMPTY_HOST; 208 209 if (host_end_pos == std::string::npos) 210 return PARSE_ERROR_EMPTY_PATH; 211 212 host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos); 213 214 // The first component can optionally be '*' to match all subdomains. 215 std::vector<std::string> host_components; 216 base::SplitString(host_, '.', &host_components); 217 if (host_components[0] == "*") { 218 match_subdomains_ = true; 219 host_components.erase(host_components.begin(), 220 host_components.begin() + 1); 221 } 222 host_ = JoinString(host_components, '.'); 223 224 path_start_pos = host_end_pos; 225 } 226 227 SetPath(pattern.substr(path_start_pos)); 228 229 size_t port_pos = host_.find(':'); 230 if (port_pos != std::string::npos) { 231 if (!SetPort(host_.substr(port_pos + 1))) 232 return PARSE_ERROR_INVALID_PORT; 233 host_ = host_.substr(0, port_pos); 234 } 235 236 // No other '*' can occur in the host, though. This isn't necessary, but is 237 // done as a convenience to developers who might otherwise be confused and 238 // think '*' works as a glob in the host. 239 if (host_.find('*') != std::string::npos) 240 return PARSE_ERROR_INVALID_HOST_WILDCARD; 241 242 return PARSE_SUCCESS; 243 } 244 245 void URLPattern::SetValidSchemes(int valid_schemes) { 246 spec_.clear(); 247 valid_schemes_ = valid_schemes; 248 } 249 250 void URLPattern::SetHost(const std::string& host) { 251 spec_.clear(); 252 host_ = host; 253 } 254 255 void URLPattern::SetMatchAllURLs(bool val) { 256 spec_.clear(); 257 match_all_urls_ = val; 258 259 if (val) { 260 match_subdomains_ = true; 261 scheme_ = "*"; 262 host_.clear(); 263 SetPath("/*"); 264 } 265 } 266 267 void URLPattern::SetMatchSubdomains(bool val) { 268 spec_.clear(); 269 match_subdomains_ = val; 270 } 271 272 bool URLPattern::SetScheme(const std::string& scheme) { 273 spec_.clear(); 274 scheme_ = scheme; 275 if (scheme_ == "*") { 276 valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); 277 } else if (!IsValidScheme(scheme_)) { 278 return false; 279 } 280 return true; 281 } 282 283 bool URLPattern::IsValidScheme(const std::string& scheme) const { 284 if (valid_schemes_ == SCHEME_ALL) 285 return true; 286 287 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { 288 if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i])) 289 return true; 290 } 291 292 return false; 293 } 294 295 void URLPattern::SetPath(const std::string& path) { 296 spec_.clear(); 297 path_ = path; 298 path_escaped_ = path_; 299 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); 300 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); 301 } 302 303 bool URLPattern::SetPort(const std::string& port) { 304 spec_.clear(); 305 if (IsValidPortForScheme(scheme_, port)) { 306 port_ = port; 307 return true; 308 } 309 return false; 310 } 311 312 bool URLPattern::MatchesURL(const GURL& test) const { 313 const GURL* test_url = &test; 314 bool has_inner_url = test.inner_url() != NULL; 315 316 if (has_inner_url) { 317 if (!test.SchemeIsFileSystem()) 318 return false; // The only nested URLs we handle are filesystem URLs. 319 test_url = test.inner_url(); 320 } 321 322 if (!MatchesScheme(test_url->scheme())) 323 return false; 324 325 if (match_all_urls_) 326 return true; 327 328 std::string path_for_request = test.PathForRequest(); 329 if (has_inner_url) 330 path_for_request = test_url->path() + path_for_request; 331 332 return MatchesSecurityOriginHelper(*test_url) && 333 MatchesPath(path_for_request); 334 } 335 336 bool URLPattern::MatchesSecurityOrigin(const GURL& test) const { 337 const GURL* test_url = &test; 338 bool has_inner_url = test.inner_url() != NULL; 339 340 if (has_inner_url) { 341 if (!test.SchemeIsFileSystem()) 342 return false; // The only nested URLs we handle are filesystem URLs. 343 test_url = test.inner_url(); 344 } 345 346 if (!MatchesScheme(test_url->scheme())) 347 return false; 348 349 if (match_all_urls_) 350 return true; 351 352 return MatchesSecurityOriginHelper(*test_url); 353 } 354 355 bool URLPattern::MatchesScheme(const std::string& test) const { 356 if (!IsValidScheme(test)) 357 return false; 358 359 return scheme_ == "*" || test == scheme_; 360 } 361 362 bool URLPattern::MatchesHost(const std::string& host) const { 363 std::string test(chrome::kHttpScheme); 364 test += content::kStandardSchemeSeparator; 365 test += host; 366 test += "/"; 367 return MatchesHost(GURL(test)); 368 } 369 370 bool URLPattern::MatchesHost(const GURL& test) const { 371 // If the hosts are exactly equal, we have a match. 372 if (test.host() == host_) 373 return true; 374 375 // If we're matching subdomains, and we have no host in the match pattern, 376 // that means that we're matching all hosts, which means we have a match no 377 // matter what the test host is. 378 if (match_subdomains_ && host_.empty()) 379 return true; 380 381 // Otherwise, we can only match if our match pattern matches subdomains. 382 if (!match_subdomains_) 383 return false; 384 385 // We don't do subdomain matching against IP addresses, so we can give up now 386 // if the test host is an IP address. 387 if (test.HostIsIPAddress()) 388 return false; 389 390 // Check if the test host is a subdomain of our host. 391 if (test.host().length() <= (host_.length() + 1)) 392 return false; 393 394 if (test.host().compare(test.host().length() - host_.length(), 395 host_.length(), host_) != 0) 396 return false; 397 398 return test.host()[test.host().length() - host_.length() - 1] == '.'; 399 } 400 401 bool URLPattern::MatchesPath(const std::string& test) const { 402 // Make the behaviour of OverlapsWith consistent with MatchesURL, which is 403 // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'. 404 if (test + "/*" == path_escaped_) 405 return true; 406 407 return MatchPattern(test, path_escaped_); 408 } 409 410 const std::string& URLPattern::GetAsString() const { 411 if (!spec_.empty()) 412 return spec_; 413 414 if (match_all_urls_) { 415 spec_ = kAllUrlsPattern; 416 return spec_; 417 } 418 419 bool standard_scheme = IsStandardScheme(scheme_); 420 421 std::string spec = scheme_ + 422 (standard_scheme ? content::kStandardSchemeSeparator : ":"); 423 424 if (scheme_ != chrome::kFileScheme && standard_scheme) { 425 if (match_subdomains_) { 426 spec += "*"; 427 if (!host_.empty()) 428 spec += "."; 429 } 430 431 if (!host_.empty()) 432 spec += host_; 433 434 if (port_ != "*") { 435 spec += ":"; 436 spec += port_; 437 } 438 } 439 440 if (!path_.empty()) 441 spec += path_; 442 443 spec_ = spec; 444 return spec_; 445 } 446 447 bool URLPattern::OverlapsWith(const URLPattern& other) const { 448 if (match_all_urls() || other.match_all_urls()) 449 return true; 450 return (MatchesAnyScheme(other.GetExplicitSchemes()) || 451 other.MatchesAnyScheme(GetExplicitSchemes())) 452 && (MatchesHost(other.host()) || other.MatchesHost(host())) 453 && (MatchesPortPattern(other.port()) || other.MatchesPortPattern(port())) 454 && (MatchesPath(StripTrailingWildcard(other.path())) || 455 other.MatchesPath(StripTrailingWildcard(path()))); 456 } 457 458 bool URLPattern::Contains(const URLPattern& other) const { 459 if (match_all_urls()) 460 return true; 461 return MatchesAllSchemes(other.GetExplicitSchemes()) 462 && MatchesHost(other.host()) 463 && MatchesPortPattern(other.port()) 464 && MatchesPath(StripTrailingWildcard(other.path())); 465 } 466 467 bool URLPattern::MatchesAnyScheme( 468 const std::vector<std::string>& schemes) const { 469 for (std::vector<std::string>::const_iterator i = schemes.begin(); 470 i != schemes.end(); ++i) { 471 if (MatchesScheme(*i)) 472 return true; 473 } 474 475 return false; 476 } 477 478 bool URLPattern::MatchesAllSchemes( 479 const std::vector<std::string>& schemes) const { 480 for (std::vector<std::string>::const_iterator i = schemes.begin(); 481 i != schemes.end(); ++i) { 482 if (!MatchesScheme(*i)) 483 return false; 484 } 485 486 return true; 487 } 488 489 bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const { 490 // Ignore hostname if scheme is file://. 491 if (scheme_ != chrome::kFileScheme && !MatchesHost(test)) 492 return false; 493 494 if (!MatchesPortPattern(base::IntToString(test.EffectiveIntPort()))) 495 return false; 496 497 return true; 498 } 499 500 bool URLPattern::MatchesPortPattern(const std::string& port) const { 501 return port_ == "*" || port_ == port; 502 } 503 504 std::vector<std::string> URLPattern::GetExplicitSchemes() const { 505 std::vector<std::string> result; 506 507 if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) { 508 result.push_back(scheme_); 509 return result; 510 } 511 512 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { 513 if (MatchesScheme(kValidSchemes[i])) { 514 result.push_back(kValidSchemes[i]); 515 } 516 } 517 518 return result; 519 } 520 521 std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const { 522 std::vector<std::string> explicit_schemes = GetExplicitSchemes(); 523 std::vector<URLPattern> result; 524 525 for (std::vector<std::string>::const_iterator i = explicit_schemes.begin(); 526 i != explicit_schemes.end(); ++i) { 527 URLPattern temp = *this; 528 temp.SetScheme(*i); 529 temp.SetMatchAllURLs(false); 530 result.push_back(temp); 531 } 532 533 return result; 534 } 535 536 // static 537 const char* URLPattern::GetParseResultString( 538 URLPattern::ParseResult parse_result) { 539 return kParseResultMessages[parse_result]; 540 } 541