1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 #ifndef EXTENSIONS_COMMON_URL_PATTERN_H_ 5 #define EXTENSIONS_COMMON_URL_PATTERN_H_ 6 7 #include <functional> 8 #include <iosfwd> 9 #include <string> 10 #include <vector> 11 12 class GURL; 13 14 // A pattern that can be used to match URLs. A URLPattern is a very restricted 15 // subset of URL syntax: 16 // 17 // <url-pattern> := <scheme>://<host><port><path> | '<all_urls>' 18 // <scheme> := '*' | 'http' | 'https' | 'file' | 'ftp' | 'chrome' | 19 // 'chrome-extension' | 'filesystem' 20 // <host> := '*' | '*.' <anychar except '/' and '*'>+ 21 // <port> := [':' ('*' | <port number between 0 and 65535>)] 22 // <path> := '/' <any chars> 23 // 24 // * Host is not used when the scheme is 'file'. 25 // * The path can have embedded '*' characters which act as glob wildcards. 26 // * '<all_urls>' is a special pattern that matches any URL that contains a 27 // valid scheme (as specified by valid_schemes_). 28 // * The '*' scheme pattern excludes file URLs. 29 // 30 // Examples of valid patterns: 31 // - http://*/* 32 // - http://*/foo* 33 // - https://*.google.com/foo*bar 34 // - file://monkey* 35 // - http://127.0.0.1/* 36 // 37 // Examples of invalid patterns: 38 // - http://* -- path not specified 39 // - http://*foo/bar -- * not allowed as substring of host component 40 // - http://foo.*.bar/baz -- * must be first component 41 // - http:/bar -- scheme separator not found 42 // - foo://* -- invalid scheme 43 // - chrome:// -- we don't support chrome internal URLs 44 class URLPattern { 45 public: 46 // A collection of scheme bitmasks for use with valid_schemes. 47 enum SchemeMasks { 48 SCHEME_NONE = 0, 49 SCHEME_HTTP = 1 << 0, 50 SCHEME_HTTPS = 1 << 1, 51 SCHEME_FILE = 1 << 2, 52 SCHEME_FTP = 1 << 3, 53 SCHEME_CHROMEUI = 1 << 4, 54 SCHEME_EXTENSION = 1 << 5, 55 SCHEME_FILESYSTEM = 1 << 6, 56 57 // IMPORTANT! 58 // SCHEME_ALL will match every scheme, including chrome://, chrome- 59 // extension://, about:, etc. Because this has lots of security 60 // implications, third-party extensions should usually not be able to get 61 // access to URL patterns initialized this way. If there is a reason 62 // for violating this general rule, document why this it safe. 63 SCHEME_ALL = -1, 64 }; 65 66 // Error codes returned from Parse(). 67 enum ParseResult { 68 PARSE_SUCCESS = 0, 69 PARSE_ERROR_MISSING_SCHEME_SEPARATOR, 70 PARSE_ERROR_INVALID_SCHEME, 71 PARSE_ERROR_WRONG_SCHEME_SEPARATOR, 72 PARSE_ERROR_EMPTY_HOST, 73 PARSE_ERROR_INVALID_HOST_WILDCARD, 74 PARSE_ERROR_EMPTY_PATH, 75 PARSE_ERROR_INVALID_PORT, 76 PARSE_ERROR_INVALID_HOST, 77 NUM_PARSE_RESULTS 78 }; 79 80 // The <all_urls> string pattern. 81 static const char kAllUrlsPattern[]; 82 83 // Returns true if the given |scheme| is considered valid for extensions. 84 static bool IsValidSchemeForExtensions(const std::string& scheme); 85 86 explicit URLPattern(int valid_schemes); 87 88 // Convenience to construct a URLPattern from a string. If the string is not 89 // known ahead of time, use Parse() instead, which returns success or failure. 90 URLPattern(int valid_schemes, const std::string& pattern); 91 92 URLPattern(); 93 ~URLPattern(); 94 95 bool operator<(const URLPattern& other) const; 96 bool operator>(const URLPattern& other) const; 97 bool operator==(const URLPattern& other) const; 98 99 // Initializes this instance by parsing the provided string. Returns 100 // URLPattern::PARSE_SUCCESS on success, or an error code otherwise. On 101 // failure, this instance will have some intermediate values and is in an 102 // invalid state. 103 ParseResult Parse(const std::string& pattern_str); 104 105 // Gets the bitmask of valid schemes. 106 int valid_schemes() const { return valid_schemes_; } 107 void SetValidSchemes(int valid_schemes); 108 109 // Gets the host the pattern matches. This can be an empty string if the 110 // pattern matches all hosts (the input was <scheme>://*/<whatever>). 111 const std::string& host() const { return host_; } 112 void SetHost(const std::string& host); 113 114 // Gets whether to match subdomains of host(). 115 bool match_subdomains() const { return match_subdomains_; } 116 void SetMatchSubdomains(bool val); 117 118 // Gets the path the pattern matches with the leading slash. This can have 119 // embedded asterisks which are interpreted using glob rules. 120 const std::string& path() const { return path_; } 121 void SetPath(const std::string& path); 122 123 // Returns true if this pattern matches all urls. 124 bool match_all_urls() const { return match_all_urls_; } 125 void SetMatchAllURLs(bool val); 126 127 // Sets the scheme for pattern matches. This can be a single '*' if the 128 // pattern matches all valid schemes (as defined by the valid_schemes_ 129 // property). Returns false on failure (if the scheme is not valid). 130 bool SetScheme(const std::string& scheme); 131 // Note: You should use MatchesScheme() instead of this getter unless you 132 // absolutely need the exact scheme. This is exposed for testing. 133 const std::string& scheme() const { return scheme_; } 134 135 // Returns true if the specified scheme can be used in this URL pattern, and 136 // false otherwise. Uses valid_schemes_ to determine validity. 137 bool IsValidScheme(const std::string& scheme) const; 138 139 // Returns true if this instance matches the specified URL. 140 bool MatchesURL(const GURL& test) const; 141 142 // Returns true if this instance matches the specified security origin. 143 bool MatchesSecurityOrigin(const GURL& test) const; 144 145 // Returns true if |test| matches our scheme. 146 // Note that if test is "filesystem", this may fail whereas MatchesURL 147 // may succeed. MatchesURL is smart enough to look at the inner_url instead 148 // of the outer "filesystem:" part. 149 bool MatchesScheme(const std::string& test) const; 150 151 // Returns true if |test| matches our host. 152 bool MatchesHost(const std::string& test) const; 153 bool MatchesHost(const GURL& test) const; 154 155 // Returns true if |test| matches our path. 156 bool MatchesPath(const std::string& test) const; 157 158 // Returns true if the pattern is vague enough that it implies all hosts, 159 // such as *://*/*. 160 // This is an expensive method, and should be used sparingly! 161 // You should probably use URLPatternSet::ShouldWarnAllHosts(), which is 162 // cached. 163 bool ImpliesAllHosts() const; 164 165 // Returns true if the pattern only matches a single origin. The pattern may 166 // include a path. 167 bool MatchesSingleOrigin() const; 168 169 // Sets the port. Returns false if the port is invalid. 170 bool SetPort(const std::string& port); 171 const std::string& port() const { return port_; } 172 173 // Returns a string representing this instance. 174 const std::string& GetAsString() const; 175 176 // Determines whether there is a URL that would match this instance and 177 // another instance. This method is symmetrical: Calling 178 // other.OverlapsWith(this) would result in the same answer. 179 bool OverlapsWith(const URLPattern& other) const; 180 181 // Returns true if this pattern matches all possible URLs that |other| can 182 // match. For example, http://*.google.com encompasses http://www.google.com. 183 bool Contains(const URLPattern& other) const; 184 185 // Converts this URLPattern into an equivalent set of URLPatterns that don't 186 // use a wildcard in the scheme component. If this URLPattern doesn't use a 187 // wildcard scheme, then the returned set will contain one element that is 188 // equivalent to this instance. 189 std::vector<URLPattern> ConvertToExplicitSchemes() const; 190 191 static bool EffectiveHostCompare(const URLPattern& a, const URLPattern& b) { 192 if (a.match_all_urls_ && b.match_all_urls_) 193 return false; 194 return a.host_.compare(b.host_) < 0; 195 } 196 197 // Used for origin comparisons in a std::set. 198 class EffectiveHostCompareFunctor { 199 public: 200 bool operator()(const URLPattern& a, const URLPattern& b) const { 201 return EffectiveHostCompare(a, b); 202 } 203 }; 204 205 // Get an error string for a ParseResult. 206 static const char* GetParseResultString(URLPattern::ParseResult parse_result); 207 208 private: 209 // Returns true if any of the |schemes| items matches our scheme. 210 bool MatchesAnyScheme(const std::vector<std::string>& schemes) const; 211 212 // Returns true if all of the |schemes| items matches our scheme. 213 bool MatchesAllSchemes(const std::vector<std::string>& schemes) const; 214 215 bool MatchesSecurityOriginHelper(const GURL& test) const; 216 217 // Returns true if our port matches the |port| pattern (it may be "*"). 218 bool MatchesPortPattern(const std::string& port) const; 219 220 // If the URLPattern contains a wildcard scheme, returns a list of 221 // equivalent literal schemes, otherwise returns the current scheme. 222 std::vector<std::string> GetExplicitSchemes() const; 223 224 // A bitmask containing the schemes which are considered valid for this 225 // pattern. Parse() uses this to decide whether a pattern contains a valid 226 // scheme. 227 int valid_schemes_; 228 229 // True if this is a special-case "<all_urls>" pattern. 230 bool match_all_urls_; 231 232 // The scheme for the pattern. 233 std::string scheme_; 234 235 // The host without any leading "*" components. 236 std::string host_; 237 238 // Whether we should match subdomains of the host. This is true if the first 239 // component of the pattern's host was "*". 240 bool match_subdomains_; 241 242 // The port. 243 std::string port_; 244 245 // The path to match. This is everything after the host of the URL, or 246 // everything after the scheme in the case of file:// URLs. 247 std::string path_; 248 249 // The path with "?" and "\" characters escaped for use with the 250 // MatchPattern() function. 251 std::string path_escaped_; 252 253 // A string representing this URLPattern. 254 mutable std::string spec_; 255 }; 256 257 std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern); 258 259 typedef std::vector<URLPattern> URLPatternList; 260 261 #endif // EXTENSIONS_COMMON_URL_PATTERN_H_ 262