1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "extensions/common/url_pattern.h" 6 7 #include "base/strings/string_number_conversions.h" 8 #include "base/strings/string_piece.h" 9 #include "base/strings/string_split.h" 10 #include "base/strings/string_util.h" 11 #include "content/public/common/url_constants.h" 12 #include "extensions/common/constants.h" 13 #include "url/gurl.h" 14 #include "url/url_util.h" 15 16 const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; 17 18 namespace { 19 20 // TODO(aa): What about more obscure schemes like data: and javascript: ? 21 // Note: keep this array in sync with kValidSchemeMasks. 22 const char* kValidSchemes[] = { 23 url::kHttpScheme, 24 url::kHttpsScheme, 25 url::kFileScheme, 26 url::kFtpScheme, 27 content::kChromeUIScheme, 28 extensions::kExtensionScheme, 29 url::kFileSystemScheme, 30 }; 31 32 const int kValidSchemeMasks[] = { 33 URLPattern::SCHEME_HTTP, 34 URLPattern::SCHEME_HTTPS, 35 URLPattern::SCHEME_FILE, 36 URLPattern::SCHEME_FTP, 37 URLPattern::SCHEME_CHROMEUI, 38 URLPattern::SCHEME_EXTENSION, 39 URLPattern::SCHEME_FILESYSTEM, 40 }; 41 42 COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks), 43 must_keep_these_arrays_in_sync); 44 45 const char kParseSuccess[] = "Success."; 46 const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator."; 47 const char kParseErrorInvalidScheme[] = "Invalid scheme."; 48 const char kParseErrorWrongSchemeType[] = "Wrong scheme type."; 49 const char kParseErrorEmptyHost[] = "Host can not be empty."; 50 const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard."; 51 const char kParseErrorEmptyPath[] = "Empty path."; 52 const char kParseErrorInvalidPort[] = "Invalid port."; 53 const char kParseErrorInvalidHost[] = "Invalid host."; 54 55 // Message explaining each URLPattern::ParseResult. 56 const char* const kParseResultMessages[] = { 57 kParseSuccess, 58 kParseErrorMissingSchemeSeparator, 59 kParseErrorInvalidScheme, 60 kParseErrorWrongSchemeType, 61 kParseErrorEmptyHost, 62 kParseErrorInvalidHostWildcard, 63 kParseErrorEmptyPath, 64 kParseErrorInvalidPort, 65 kParseErrorInvalidHost, 66 }; 67 68 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), 69 must_add_message_for_each_parse_result); 70 71 const char kPathSeparator[] = "/"; 72 73 bool IsStandardScheme(const std::string& scheme) { 74 // "*" gets the same treatment as a standard scheme. 75 if (scheme == "*") 76 return true; 77 78 return url::IsStandard(scheme.c_str(), 79 url::Component(0, static_cast<int>(scheme.length()))); 80 } 81 82 bool IsValidPortForScheme(const std::string& scheme, const std::string& port) { 83 if (port == "*") 84 return true; 85 86 // Only accept non-wildcard ports if the scheme uses ports. 87 if (url::DefaultPortForScheme(scheme.c_str(), scheme.length()) == 88 url::PORT_UNSPECIFIED) { 89 return false; 90 } 91 92 int parsed_port = url::PORT_UNSPECIFIED; 93 if (!base::StringToInt(port, &parsed_port)) 94 return false; 95 return (parsed_port >= 0) && (parsed_port < 65536); 96 } 97 98 // Returns |path| with the trailing wildcard stripped if one existed. 99 // 100 // The functions that rely on this (OverlapsWith and Contains) are only 101 // called for the patterns inside URLPatternSet. In those cases, we know that 102 // the path will have only a single wildcard at the end. This makes figuring 103 // out overlap much easier. It seems like there is probably a computer-sciency 104 // way to solve the general case, but we don't need that yet. 105 std::string StripTrailingWildcard(const std::string& path) { 106 size_t wildcard_index = path.find('*'); 107 size_t path_last = path.size() - 1; 108 DCHECK(wildcard_index == std::string::npos || wildcard_index == path_last); 109 return wildcard_index == path_last ? path.substr(0, path_last) : path; 110 } 111 112 } // namespace 113 114 // static 115 bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) { 116 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { 117 if (scheme == kValidSchemes[i]) 118 return true; 119 } 120 return false; 121 } 122 123 URLPattern::URLPattern() 124 : valid_schemes_(SCHEME_NONE), 125 match_all_urls_(false), 126 match_subdomains_(false), 127 port_("*") {} 128 129 URLPattern::URLPattern(int valid_schemes) 130 : valid_schemes_(valid_schemes), 131 match_all_urls_(false), 132 match_subdomains_(false), 133 port_("*") {} 134 135 URLPattern::URLPattern(int valid_schemes, const std::string& pattern) 136 // Strict error checking is used, because this constructor is only 137 // appropriate when we know |pattern| is valid. 138 : valid_schemes_(valid_schemes), 139 match_all_urls_(false), 140 match_subdomains_(false), 141 port_("*") { 142 ParseResult result = Parse(pattern); 143 if (PARSE_SUCCESS != result) 144 NOTREACHED() << "URLPattern invalid: " << pattern << " result " << result; 145 } 146 147 URLPattern::~URLPattern() { 148 } 149 150 bool URLPattern::operator<(const URLPattern& other) const { 151 return GetAsString() < other.GetAsString(); 152 } 153 154 bool URLPattern::operator>(const URLPattern& other) const { 155 return GetAsString() > other.GetAsString(); 156 } 157 158 bool URLPattern::operator==(const URLPattern& other) const { 159 return GetAsString() == other.GetAsString(); 160 } 161 162 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) { 163 spec_.clear(); 164 SetMatchAllURLs(false); 165 SetMatchSubdomains(false); 166 SetPort("*"); 167 168 // Special case pattern to match every valid URL. 169 if (pattern == kAllUrlsPattern) { 170 SetMatchAllURLs(true); 171 return PARSE_SUCCESS; 172 } 173 174 // Parse out the scheme. 175 size_t scheme_end_pos = pattern.find(url::kStandardSchemeSeparator); 176 bool has_standard_scheme_separator = true; 177 178 // Some urls also use ':' alone as the scheme separator. 179 if (scheme_end_pos == std::string::npos) { 180 scheme_end_pos = pattern.find(':'); 181 has_standard_scheme_separator = false; 182 } 183 184 if (scheme_end_pos == std::string::npos) 185 return PARSE_ERROR_MISSING_SCHEME_SEPARATOR; 186 187 if (!SetScheme(pattern.substr(0, scheme_end_pos))) 188 return PARSE_ERROR_INVALID_SCHEME; 189 190 bool standard_scheme = IsStandardScheme(scheme_); 191 if (standard_scheme != has_standard_scheme_separator) 192 return PARSE_ERROR_WRONG_SCHEME_SEPARATOR; 193 194 // Advance past the scheme separator. 195 scheme_end_pos += 196 (standard_scheme ? strlen(url::kStandardSchemeSeparator) : 1); 197 if (scheme_end_pos >= pattern.size()) 198 return PARSE_ERROR_EMPTY_HOST; 199 200 // Parse out the host and path. 201 size_t host_start_pos = scheme_end_pos; 202 size_t path_start_pos = 0; 203 204 if (!standard_scheme) { 205 path_start_pos = host_start_pos; 206 } else if (scheme_ == url::kFileScheme) { 207 size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); 208 if (host_end_pos == std::string::npos) { 209 // Allow hostname omission. 210 // e.g. file://* is interpreted as file:///*, 211 // file://foo* is interpreted as file:///foo*. 212 path_start_pos = host_start_pos - 1; 213 } else { 214 // Ignore hostname if scheme is file://. 215 // e.g. file://localhost/foo is equal to file:///foo. 216 path_start_pos = host_end_pos; 217 } 218 } else { 219 size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); 220 221 // Host is required. 222 if (host_start_pos == host_end_pos) 223 return PARSE_ERROR_EMPTY_HOST; 224 225 if (host_end_pos == std::string::npos) 226 return PARSE_ERROR_EMPTY_PATH; 227 228 host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos); 229 230 // The first component can optionally be '*' to match all subdomains. 231 std::vector<std::string> host_components; 232 base::SplitString(host_, '.', &host_components); 233 if (host_components[0] == "*") { 234 match_subdomains_ = true; 235 host_components.erase(host_components.begin(), 236 host_components.begin() + 1); 237 } 238 host_ = JoinString(host_components, '.'); 239 240 path_start_pos = host_end_pos; 241 } 242 243 SetPath(pattern.substr(path_start_pos)); 244 245 size_t port_pos = host_.find(':'); 246 if (port_pos != std::string::npos) { 247 if (!SetPort(host_.substr(port_pos + 1))) 248 return PARSE_ERROR_INVALID_PORT; 249 host_ = host_.substr(0, port_pos); 250 } 251 252 // No other '*' can occur in the host, though. This isn't necessary, but is 253 // done as a convenience to developers who might otherwise be confused and 254 // think '*' works as a glob in the host. 255 if (host_.find('*') != std::string::npos) 256 return PARSE_ERROR_INVALID_HOST_WILDCARD; 257 258 // Null characters are not allowed in hosts. 259 if (host_.find('\0') != std::string::npos) 260 return PARSE_ERROR_INVALID_HOST; 261 262 return PARSE_SUCCESS; 263 } 264 265 void URLPattern::SetValidSchemes(int valid_schemes) { 266 spec_.clear(); 267 valid_schemes_ = valid_schemes; 268 } 269 270 void URLPattern::SetHost(const std::string& host) { 271 spec_.clear(); 272 host_ = host; 273 } 274 275 void URLPattern::SetMatchAllURLs(bool val) { 276 spec_.clear(); 277 match_all_urls_ = val; 278 279 if (val) { 280 match_subdomains_ = true; 281 scheme_ = "*"; 282 host_.clear(); 283 SetPath("/*"); 284 } 285 } 286 287 void URLPattern::SetMatchSubdomains(bool val) { 288 spec_.clear(); 289 match_subdomains_ = val; 290 } 291 292 bool URLPattern::SetScheme(const std::string& scheme) { 293 spec_.clear(); 294 scheme_ = scheme; 295 if (scheme_ == "*") { 296 valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); 297 } else if (!IsValidScheme(scheme_)) { 298 return false; 299 } 300 return true; 301 } 302 303 bool URLPattern::IsValidScheme(const std::string& scheme) const { 304 if (valid_schemes_ == SCHEME_ALL) 305 return true; 306 307 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { 308 if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i])) 309 return true; 310 } 311 312 return false; 313 } 314 315 void URLPattern::SetPath(const std::string& path) { 316 spec_.clear(); 317 path_ = path; 318 path_escaped_ = path_; 319 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); 320 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); 321 } 322 323 bool URLPattern::SetPort(const std::string& port) { 324 spec_.clear(); 325 if (IsValidPortForScheme(scheme_, port)) { 326 port_ = port; 327 return true; 328 } 329 return false; 330 } 331 332 bool URLPattern::MatchesURL(const GURL& test) const { 333 const GURL* test_url = &test; 334 bool has_inner_url = test.inner_url() != NULL; 335 336 if (has_inner_url) { 337 if (!test.SchemeIsFileSystem()) 338 return false; // The only nested URLs we handle are filesystem URLs. 339 test_url = test.inner_url(); 340 } 341 342 if (!MatchesScheme(test_url->scheme())) 343 return false; 344 345 if (match_all_urls_) 346 return true; 347 348 std::string path_for_request = test.PathForRequest(); 349 if (has_inner_url) 350 path_for_request = test_url->path() + path_for_request; 351 352 return MatchesSecurityOriginHelper(*test_url) && 353 MatchesPath(path_for_request); 354 } 355 356 bool URLPattern::MatchesSecurityOrigin(const GURL& test) const { 357 const GURL* test_url = &test; 358 bool has_inner_url = test.inner_url() != NULL; 359 360 if (has_inner_url) { 361 if (!test.SchemeIsFileSystem()) 362 return false; // The only nested URLs we handle are filesystem URLs. 363 test_url = test.inner_url(); 364 } 365 366 if (!MatchesScheme(test_url->scheme())) 367 return false; 368 369 if (match_all_urls_) 370 return true; 371 372 return MatchesSecurityOriginHelper(*test_url); 373 } 374 375 bool URLPattern::MatchesScheme(const std::string& test) const { 376 if (!IsValidScheme(test)) 377 return false; 378 379 return scheme_ == "*" || test == scheme_; 380 } 381 382 bool URLPattern::MatchesHost(const std::string& host) const { 383 std::string test(url::kHttpScheme); 384 test += url::kStandardSchemeSeparator; 385 test += host; 386 test += "/"; 387 return MatchesHost(GURL(test)); 388 } 389 390 bool URLPattern::MatchesHost(const GURL& test) const { 391 // If the hosts are exactly equal, we have a match. 392 if (test.host() == host_) 393 return true; 394 395 // If we're matching subdomains, and we have no host in the match pattern, 396 // that means that we're matching all hosts, which means we have a match no 397 // matter what the test host is. 398 if (match_subdomains_ && host_.empty()) 399 return true; 400 401 // Otherwise, we can only match if our match pattern matches subdomains. 402 if (!match_subdomains_) 403 return false; 404 405 // We don't do subdomain matching against IP addresses, so we can give up now 406 // if the test host is an IP address. 407 if (test.HostIsIPAddress()) 408 return false; 409 410 // Check if the test host is a subdomain of our host. 411 if (test.host().length() <= (host_.length() + 1)) 412 return false; 413 414 if (test.host().compare(test.host().length() - host_.length(), 415 host_.length(), host_) != 0) 416 return false; 417 418 return test.host()[test.host().length() - host_.length() - 1] == '.'; 419 } 420 421 bool URLPattern::MatchesPath(const std::string& test) const { 422 // Make the behaviour of OverlapsWith consistent with MatchesURL, which is 423 // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'. 424 if (test + "/*" == path_escaped_) 425 return true; 426 427 return MatchPattern(test, path_escaped_); 428 } 429 430 const std::string& URLPattern::GetAsString() const { 431 if (!spec_.empty()) 432 return spec_; 433 434 if (match_all_urls_) { 435 spec_ = kAllUrlsPattern; 436 return spec_; 437 } 438 439 bool standard_scheme = IsStandardScheme(scheme_); 440 441 std::string spec = scheme_ + 442 (standard_scheme ? url::kStandardSchemeSeparator : ":"); 443 444 if (scheme_ != url::kFileScheme && standard_scheme) { 445 if (match_subdomains_) { 446 spec += "*"; 447 if (!host_.empty()) 448 spec += "."; 449 } 450 451 if (!host_.empty()) 452 spec += host_; 453 454 if (port_ != "*") { 455 spec += ":"; 456 spec += port_; 457 } 458 } 459 460 if (!path_.empty()) 461 spec += path_; 462 463 spec_ = spec; 464 return spec_; 465 } 466 467 bool URLPattern::OverlapsWith(const URLPattern& other) const { 468 if (match_all_urls() || other.match_all_urls()) 469 return true; 470 return (MatchesAnyScheme(other.GetExplicitSchemes()) || 471 other.MatchesAnyScheme(GetExplicitSchemes())) 472 && (MatchesHost(other.host()) || other.MatchesHost(host())) 473 && (MatchesPortPattern(other.port()) || other.MatchesPortPattern(port())) 474 && (MatchesPath(StripTrailingWildcard(other.path())) || 475 other.MatchesPath(StripTrailingWildcard(path()))); 476 } 477 478 bool URLPattern::Contains(const URLPattern& other) const { 479 if (match_all_urls()) 480 return true; 481 return MatchesAllSchemes(other.GetExplicitSchemes()) 482 && MatchesHost(other.host()) 483 && MatchesPortPattern(other.port()) 484 && MatchesPath(StripTrailingWildcard(other.path())); 485 } 486 487 bool URLPattern::MatchesAnyScheme( 488 const std::vector<std::string>& schemes) const { 489 for (std::vector<std::string>::const_iterator i = schemes.begin(); 490 i != schemes.end(); ++i) { 491 if (MatchesScheme(*i)) 492 return true; 493 } 494 495 return false; 496 } 497 498 bool URLPattern::MatchesAllSchemes( 499 const std::vector<std::string>& schemes) const { 500 for (std::vector<std::string>::const_iterator i = schemes.begin(); 501 i != schemes.end(); ++i) { 502 if (!MatchesScheme(*i)) 503 return false; 504 } 505 506 return true; 507 } 508 509 bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const { 510 // Ignore hostname if scheme is file://. 511 if (scheme_ != url::kFileScheme && !MatchesHost(test)) 512 return false; 513 514 if (!MatchesPortPattern(base::IntToString(test.EffectiveIntPort()))) 515 return false; 516 517 return true; 518 } 519 520 bool URLPattern::MatchesPortPattern(const std::string& port) const { 521 return port_ == "*" || port_ == port; 522 } 523 524 std::vector<std::string> URLPattern::GetExplicitSchemes() const { 525 std::vector<std::string> result; 526 527 if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) { 528 result.push_back(scheme_); 529 return result; 530 } 531 532 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { 533 if (MatchesScheme(kValidSchemes[i])) { 534 result.push_back(kValidSchemes[i]); 535 } 536 } 537 538 return result; 539 } 540 541 std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const { 542 std::vector<std::string> explicit_schemes = GetExplicitSchemes(); 543 std::vector<URLPattern> result; 544 545 for (std::vector<std::string>::const_iterator i = explicit_schemes.begin(); 546 i != explicit_schemes.end(); ++i) { 547 URLPattern temp = *this; 548 temp.SetScheme(*i); 549 temp.SetMatchAllURLs(false); 550 result.push_back(temp); 551 } 552 553 return result; 554 } 555 556 // static 557 const char* URLPattern::GetParseResultString( 558 URLPattern::ParseResult parse_result) { 559 return kParseResultMessages[parse_result]; 560 } 561