1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/common/extensions/url_pattern.h" 6 7 #include "base/string_piece.h" 8 #include "base/string_split.h" 9 #include "base/string_util.h" 10 #include "chrome/common/url_constants.h" 11 #include "googleurl/src/gurl.h" 12 #include "googleurl/src/url_util.h" 13 14 const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; 15 16 namespace { 17 18 // TODO(aa): Consider adding chrome-extension? What about more obscure ones 19 // like data: and javascript: ? 20 // Note: keep this array in sync with kValidSchemeMasks. 21 const char* kValidSchemes[] = { 22 chrome::kHttpScheme, 23 chrome::kHttpsScheme, 24 chrome::kFileScheme, 25 chrome::kFtpScheme, 26 chrome::kChromeUIScheme, 27 chrome::kFileSystemScheme, 28 }; 29 30 const int kValidSchemeMasks[] = { 31 URLPattern::SCHEME_HTTP, 32 URLPattern::SCHEME_HTTPS, 33 URLPattern::SCHEME_FILE, 34 URLPattern::SCHEME_FTP, 35 URLPattern::SCHEME_CHROMEUI, 36 URLPattern::SCHEME_FILESYSTEM, 37 }; 38 39 COMPILE_ASSERT(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks), 40 must_keep_these_arrays_in_sync); 41 42 const char* kParseSuccess = "Success."; 43 const char* kParseErrorMissingSchemeSeparator = "Missing scheme separator."; 44 const char* kParseErrorInvalidScheme = "Invalid scheme."; 45 const char* kParseErrorWrongSchemeType = "Wrong scheme type."; 46 const char* kParseErrorEmptyHost = "Host can not be empty."; 47 const char* kParseErrorInvalidHostWildcard = "Invalid host wildcard."; 48 const char* kParseErrorEmptyPath = "Empty path."; 49 const char* kParseErrorHasColon = 50 "Ports are not supported in URL patterns. ':' may not be used in a host."; 51 52 // Message explaining each URLPattern::ParseResult. 53 const char* kParseResultMessages[] = { 54 kParseSuccess, 55 kParseErrorMissingSchemeSeparator, 56 kParseErrorInvalidScheme, 57 kParseErrorWrongSchemeType, 58 kParseErrorEmptyHost, 59 kParseErrorInvalidHostWildcard, 60 kParseErrorEmptyPath, 61 kParseErrorHasColon 62 }; 63 64 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), 65 must_add_message_for_each_parse_result); 66 67 const char kPathSeparator[] = "/"; 68 69 bool IsStandardScheme(const std::string& scheme) { 70 // "*" gets the same treatment as a standard scheme. 71 if (scheme == "*") 72 return true; 73 74 return url_util::IsStandard(scheme.c_str(), 75 url_parse::Component(0, static_cast<int>(scheme.length()))); 76 } 77 78 } // namespace 79 80 URLPattern::URLPattern() 81 : valid_schemes_(SCHEME_NONE), 82 match_all_urls_(false), 83 match_subdomains_(false) {} 84 85 URLPattern::URLPattern(int valid_schemes) 86 : valid_schemes_(valid_schemes), match_all_urls_(false), 87 match_subdomains_(false) {} 88 89 URLPattern::URLPattern(int valid_schemes, const std::string& pattern) 90 : valid_schemes_(valid_schemes), match_all_urls_(false), 91 match_subdomains_(false) { 92 93 // Strict error checking is used, because this constructor is only 94 // appropriate when we know |pattern| is valid. 95 if (PARSE_SUCCESS != Parse(pattern, PARSE_STRICT)) 96 NOTREACHED() << "URLPattern is invalid: " << pattern; 97 } 98 99 URLPattern::~URLPattern() { 100 } 101 102 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern, 103 ParseOption strictness) { 104 CHECK(strictness == PARSE_LENIENT || 105 strictness == PARSE_STRICT); 106 107 // Special case pattern to match every valid URL. 108 if (pattern == kAllUrlsPattern) { 109 match_all_urls_ = true; 110 match_subdomains_ = true; 111 scheme_ = "*"; 112 host_.clear(); 113 SetPath("/*"); 114 return PARSE_SUCCESS; 115 } 116 117 // Parse out the scheme. 118 size_t scheme_end_pos = pattern.find(chrome::kStandardSchemeSeparator); 119 bool has_standard_scheme_separator = true; 120 121 // Some urls also use ':' alone as the scheme separator. 122 if (scheme_end_pos == std::string::npos) { 123 scheme_end_pos = pattern.find(':'); 124 has_standard_scheme_separator = false; 125 } 126 127 if (scheme_end_pos == std::string::npos) 128 return PARSE_ERROR_MISSING_SCHEME_SEPARATOR; 129 130 if (!SetScheme(pattern.substr(0, scheme_end_pos))) 131 return PARSE_ERROR_INVALID_SCHEME; 132 133 bool standard_scheme = IsStandardScheme(scheme_); 134 if (standard_scheme != has_standard_scheme_separator) 135 return PARSE_ERROR_WRONG_SCHEME_SEPARATOR; 136 137 // Advance past the scheme separator. 138 scheme_end_pos += 139 (standard_scheme ? strlen(chrome::kStandardSchemeSeparator) : 1); 140 if (scheme_end_pos >= pattern.size()) 141 return PARSE_ERROR_EMPTY_HOST; 142 143 // Parse out the host and path. 144 size_t host_start_pos = scheme_end_pos; 145 size_t path_start_pos = 0; 146 147 // File URLs are special because they have no host. 148 if (scheme_ == chrome::kFileScheme || !standard_scheme) { 149 path_start_pos = host_start_pos; 150 } else { 151 size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); 152 153 // Host is required. 154 if (host_start_pos == host_end_pos) 155 return PARSE_ERROR_EMPTY_HOST; 156 157 if (host_end_pos == std::string::npos) 158 return PARSE_ERROR_EMPTY_PATH; 159 160 host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos); 161 162 // The first component can optionally be '*' to match all subdomains. 163 std::vector<std::string> host_components; 164 base::SplitString(host_, '.', &host_components); 165 if (host_components[0] == "*") { 166 match_subdomains_ = true; 167 host_components.erase(host_components.begin(), 168 host_components.begin() + 1); 169 } 170 host_ = JoinString(host_components, '.'); 171 172 // No other '*' can occur in the host, though. This isn't necessary, but is 173 // done as a convenience to developers who might otherwise be confused and 174 // think '*' works as a glob in the host. 175 if (host_.find('*') != std::string::npos) 176 return PARSE_ERROR_INVALID_HOST_WILDCARD; 177 178 path_start_pos = host_end_pos; 179 } 180 181 SetPath(pattern.substr(path_start_pos)); 182 183 if (strictness == PARSE_STRICT && host_.find(':') != std::string::npos) 184 return PARSE_ERROR_HAS_COLON; 185 186 return PARSE_SUCCESS; 187 } 188 189 bool URLPattern::SetScheme(const std::string& scheme) { 190 scheme_ = scheme; 191 if (scheme_ == "*") { 192 valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); 193 } else if (!IsValidScheme(scheme_)) { 194 return false; 195 } 196 return true; 197 } 198 199 bool URLPattern::IsValidScheme(const std::string& scheme) const { 200 if (valid_schemes_ == SCHEME_ALL) 201 return true; 202 203 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { 204 if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i])) 205 return true; 206 } 207 208 return false; 209 } 210 211 void URLPattern::SetPath(const std::string& path) { 212 path_ = path; 213 path_escaped_ = path_; 214 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); 215 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); 216 } 217 218 bool URLPattern::MatchesUrl(const GURL &test) const { 219 if (!MatchesScheme(test.scheme())) 220 return false; 221 222 if (match_all_urls_) 223 return true; 224 225 if (!MatchesHost(test)) 226 return false; 227 228 if (!MatchesPath(test.PathForRequest())) 229 return false; 230 231 return true; 232 } 233 234 bool URLPattern::MatchesScheme(const std::string& test) const { 235 if (!IsValidScheme(test)) 236 return false; 237 238 return scheme_ == "*" || test == scheme_; 239 } 240 241 bool URLPattern::MatchesHost(const std::string& host) const { 242 std::string test(chrome::kHttpScheme); 243 test += chrome::kStandardSchemeSeparator; 244 test += host; 245 test += "/"; 246 return MatchesHost(GURL(test)); 247 } 248 249 bool URLPattern::MatchesHost(const GURL& test) const { 250 // If the hosts are exactly equal, we have a match. 251 if (test.host() == host_) 252 return true; 253 254 // If we're matching subdomains, and we have no host in the match pattern, 255 // that means that we're matching all hosts, which means we have a match no 256 // matter what the test host is. 257 if (match_subdomains_ && host_.empty()) 258 return true; 259 260 // Otherwise, we can only match if our match pattern matches subdomains. 261 if (!match_subdomains_) 262 return false; 263 264 // We don't do subdomain matching against IP addresses, so we can give up now 265 // if the test host is an IP address. 266 if (test.HostIsIPAddress()) 267 return false; 268 269 // Check if the test host is a subdomain of our host. 270 if (test.host().length() <= (host_.length() + 1)) 271 return false; 272 273 if (test.host().compare(test.host().length() - host_.length(), 274 host_.length(), host_) != 0) 275 return false; 276 277 return test.host()[test.host().length() - host_.length() - 1] == '.'; 278 } 279 280 bool URLPattern::MatchesPath(const std::string& test) const { 281 if (!MatchPattern(test, path_escaped_)) 282 return false; 283 284 return true; 285 } 286 287 std::string URLPattern::GetAsString() const { 288 if (match_all_urls_) 289 return kAllUrlsPattern; 290 291 bool standard_scheme = IsStandardScheme(scheme_); 292 293 std::string spec = scheme_ + 294 (standard_scheme ? chrome::kStandardSchemeSeparator : ":"); 295 296 if (scheme_ != chrome::kFileScheme && standard_scheme) { 297 if (match_subdomains_) { 298 spec += "*"; 299 if (!host_.empty()) 300 spec += "."; 301 } 302 303 if (!host_.empty()) 304 spec += host_; 305 } 306 307 if (!path_.empty()) 308 spec += path_; 309 310 return spec; 311 } 312 313 bool URLPattern::OverlapsWith(const URLPattern& other) const { 314 if (!MatchesScheme(other.scheme_) && !other.MatchesScheme(scheme_)) 315 return false; 316 317 if (!MatchesHost(other.host()) && !other.MatchesHost(host_)) 318 return false; 319 320 // We currently only use OverlapsWith() for the patterns inside 321 // ExtensionExtent. In those cases, we know that the path will have only a 322 // single wildcard at the end. This makes figuring out overlap much easier. It 323 // seems like there is probably a computer-sciency way to solve the general 324 // case, but we don't need that yet. 325 DCHECK(path_.find('*') == path_.size() - 1); 326 DCHECK(other.path().find('*') == other.path().size() - 1); 327 328 if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) && 329 !other.MatchesPath(path_.substr(0, path_.size() - 1))) 330 return false; 331 332 return true; 333 } 334 335 std::vector<URLPattern> URLPattern::ConvertToExplicitSchemes() const { 336 std::vector<URLPattern> result; 337 338 if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) { 339 result.push_back(*this); 340 return result; 341 } 342 343 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { 344 if (MatchesScheme(kValidSchemes[i])) { 345 URLPattern temp = *this; 346 temp.SetScheme(kValidSchemes[i]); 347 temp.set_match_all_urls(false); 348 result.push_back(temp); 349 } 350 } 351 352 return result; 353 } 354 355 // static 356 const char* URLPattern::GetParseResultString( 357 URLPattern::ParseResult parse_result) { 358 return kParseResultMessages[parse_result]; 359 } 360