Home | History | Annotate | Download | only in common
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 #ifndef EXTENSIONS_COMMON_URL_PATTERN_H_
      5 #define EXTENSIONS_COMMON_URL_PATTERN_H_
      6 
      7 #include <functional>
      8 #include <string>
      9 #include <vector>
     10 
     11 class GURL;
     12 
     13 // A pattern that can be used to match URLs. A URLPattern is a very restricted
     14 // subset of URL syntax:
     15 //
     16 // <url-pattern> := <scheme>://<host><port><path> | '<all_urls>'
     17 // <scheme> := '*' | 'http' | 'https' | 'file' | 'ftp' | 'chrome' |
     18 //             'chrome-extension' | 'filesystem'
     19 // <host> := '*' | '*.' <anychar except '/' and '*'>+
     20 // <port> := [':' ('*' | <port number between 0 and 65535>)]
     21 // <path> := '/' <any chars>
     22 //
     23 // * Host is not used when the scheme is 'file'.
     24 // * The path can have embedded '*' characters which act as glob wildcards.
     25 // * '<all_urls>' is a special pattern that matches any URL that contains a
     26 //   valid scheme (as specified by valid_schemes_).
     27 // * The '*' scheme pattern excludes file URLs.
     28 //
     29 // Examples of valid patterns:
     30 // - http://*/*
     31 // - http://*/foo*
     32 // - https://*.google.com/foo*bar
     33 // - file://monkey*
     34 // - http://127.0.0.1/*
     35 //
     36 // Examples of invalid patterns:
     37 // - http://* -- path not specified
     38 // - http://*foo/bar -- * not allowed as substring of host component
     39 // - http://foo.*.bar/baz -- * must be first component
     40 // - http:/bar -- scheme separator not found
     41 // - foo://* -- invalid scheme
     42 // - chrome:// -- we don't support chrome internal URLs
     43 class URLPattern {
     44  public:
     45   // A collection of scheme bitmasks for use with valid_schemes.
     46   enum SchemeMasks {
     47     SCHEME_NONE       = 0,
     48     SCHEME_HTTP       = 1 << 0,
     49     SCHEME_HTTPS      = 1 << 1,
     50     SCHEME_FILE       = 1 << 2,
     51     SCHEME_FTP        = 1 << 3,
     52     SCHEME_CHROMEUI   = 1 << 4,
     53     SCHEME_EXTENSION  = 1 << 5,
     54     SCHEME_FILESYSTEM = 1 << 6,
     55 
     56     // IMPORTANT!
     57     // SCHEME_ALL will match every scheme, including chrome://, chrome-
     58     // extension://, about:, etc. Because this has lots of security
     59     // implications, third-party extensions should usually not be able to get
     60     // access to URL patterns initialized this way. If there is a reason
     61     // for violating this general rule, document why this it safe.
     62     SCHEME_ALL      = -1,
     63   };
     64 
     65   // Error codes returned from Parse().
     66   enum ParseResult {
     67     PARSE_SUCCESS = 0,
     68     PARSE_ERROR_MISSING_SCHEME_SEPARATOR,
     69     PARSE_ERROR_INVALID_SCHEME,
     70     PARSE_ERROR_WRONG_SCHEME_SEPARATOR,
     71     PARSE_ERROR_EMPTY_HOST,
     72     PARSE_ERROR_INVALID_HOST_WILDCARD,
     73     PARSE_ERROR_EMPTY_PATH,
     74     PARSE_ERROR_INVALID_PORT,
     75     NUM_PARSE_RESULTS
     76   };
     77 
     78   // The <all_urls> string pattern.
     79   static const char kAllUrlsPattern[];
     80 
     81   explicit URLPattern(int valid_schemes);
     82 
     83   // Convenience to construct a URLPattern from a string. If the string is not
     84   // known ahead of time, use Parse() instead, which returns success or failure.
     85   URLPattern(int valid_schemes, const std::string& pattern);
     86 
     87   URLPattern();
     88   ~URLPattern();
     89 
     90   bool operator<(const URLPattern& other) const;
     91   bool operator==(const URLPattern& other) const;
     92 
     93   // Initializes this instance by parsing the provided string. Returns
     94   // URLPattern::PARSE_SUCCESS on success, or an error code otherwise. On
     95   // failure, this instance will have some intermediate values and is in an
     96   // invalid state.
     97   ParseResult Parse(const std::string& pattern_str);
     98 
     99   // Gets the bitmask of valid schemes.
    100   int valid_schemes() const { return valid_schemes_; }
    101   void SetValidSchemes(int valid_schemes);
    102 
    103   // Gets the host the pattern matches. This can be an empty string if the
    104   // pattern matches all hosts (the input was <scheme>://*/<whatever>).
    105   const std::string& host() const { return host_; }
    106   void SetHost(const std::string& host);
    107 
    108   // Gets whether to match subdomains of host().
    109   bool match_subdomains() const { return match_subdomains_; }
    110   void SetMatchSubdomains(bool val);
    111 
    112   // Gets the path the pattern matches with the leading slash. This can have
    113   // embedded asterisks which are interpreted using glob rules.
    114   const std::string& path() const { return path_; }
    115   void SetPath(const std::string& path);
    116 
    117   // Returns true if this pattern matches all urls.
    118   bool match_all_urls() const { return match_all_urls_; }
    119   void SetMatchAllURLs(bool val);
    120 
    121   // Sets the scheme for pattern matches. This can be a single '*' if the
    122   // pattern matches all valid schemes (as defined by the valid_schemes_
    123   // property). Returns false on failure (if the scheme is not valid).
    124   bool SetScheme(const std::string& scheme);
    125   // Note: You should use MatchesScheme() instead of this getter unless you
    126   // absolutely need the exact scheme. This is exposed for testing.
    127   const std::string& scheme() const { return scheme_; }
    128 
    129   // Returns true if the specified scheme can be used in this URL pattern, and
    130   // false otherwise. Uses valid_schemes_ to determine validity.
    131   bool IsValidScheme(const std::string& scheme) const;
    132 
    133   // Returns true if this instance matches the specified URL.
    134   bool MatchesURL(const GURL& test) const;
    135 
    136   // Returns true if this instance matches the specified security origin.
    137   bool MatchesSecurityOrigin(const GURL& test) const;
    138 
    139   // Returns true if |test| matches our scheme.
    140   // Note that if test is "filesystem", this may fail whereas MatchesURL
    141   // may succeed.  MatchesURL is smart enough to look at the inner_url instead
    142   // of the outer "filesystem:" part.
    143   bool MatchesScheme(const std::string& test) const;
    144 
    145   // Returns true if |test| matches our host.
    146   bool MatchesHost(const std::string& test) const;
    147   bool MatchesHost(const GURL& test) const;
    148 
    149   // Returns true if |test| matches our path.
    150   bool MatchesPath(const std::string& test) const;
    151 
    152   // Sets the port. Returns false if the port is invalid.
    153   bool SetPort(const std::string& port);
    154   const std::string& port() const { return port_; }
    155 
    156   // Returns a string representing this instance.
    157   const std::string& GetAsString() const;
    158 
    159   // Determines whether there is a URL that would match this instance and
    160   // another instance. This method is symmetrical: Calling
    161   // other.OverlapsWith(this) would result in the same answer.
    162   bool OverlapsWith(const URLPattern& other) const;
    163 
    164   // Returns true if this pattern matches all possible URLs that |other| can
    165   // match. For example, http://*.google.com encompasses http://www.google.com.
    166   bool Contains(const URLPattern& other) const;
    167 
    168   // Converts this URLPattern into an equivalent set of URLPatterns that don't
    169   // use a wildcard in the scheme component. If this URLPattern doesn't use a
    170   // wildcard scheme, then the returned set will contain one element that is
    171   // equivalent to this instance.
    172   std::vector<URLPattern> ConvertToExplicitSchemes() const;
    173 
    174   static bool EffectiveHostCompare(const URLPattern& a, const URLPattern& b) {
    175     if (a.match_all_urls_ && b.match_all_urls_)
    176       return false;
    177     return a.host_.compare(b.host_) < 0;
    178   };
    179 
    180   // Used for origin comparisons in a std::set.
    181   class EffectiveHostCompareFunctor {
    182    public:
    183     bool operator()(const URLPattern& a, const URLPattern& b) const {
    184       return EffectiveHostCompare(a, b);
    185     };
    186   };
    187 
    188   // Get an error string for a ParseResult.
    189   static const char* GetParseResultString(URLPattern::ParseResult parse_result);
    190 
    191   // Checks whether the bit is set for the given scheme in the given scheme mask
    192   static bool IsSchemeBitSet(const std::string& scheme, const int mask);
    193 
    194  private:
    195   // Returns true if any of the |schemes| items matches our scheme.
    196   bool MatchesAnyScheme(const std::vector<std::string>& schemes) const;
    197 
    198   // Returns true if all of the |schemes| items matches our scheme.
    199   bool MatchesAllSchemes(const std::vector<std::string>& schemes) const;
    200 
    201   bool MatchesSecurityOriginHelper(const GURL& test) const;
    202 
    203   // Returns true if our port matches the |port| pattern (it may be "*").
    204   bool MatchesPortPattern(const std::string& port) const;
    205 
    206   // If the URLPattern contains a wildcard scheme, returns a list of
    207   // equivalent literal schemes, otherwise returns the current scheme.
    208   std::vector<std::string> GetExplicitSchemes() const;
    209 
    210   // A bitmask containing the schemes which are considered valid for this
    211   // pattern. Parse() uses this to decide whether a pattern contains a valid
    212   // scheme.
    213   int valid_schemes_;
    214 
    215   // True if this is a special-case "<all_urls>" pattern.
    216   bool match_all_urls_;
    217 
    218   // The scheme for the pattern.
    219   std::string scheme_;
    220 
    221   // The host without any leading "*" components.
    222   std::string host_;
    223 
    224   // Whether we should match subdomains of the host. This is true if the first
    225   // component of the pattern's host was "*".
    226   bool match_subdomains_;
    227 
    228   // The port.
    229   std::string port_;
    230 
    231   // The path to match. This is everything after the host of the URL, or
    232   // everything after the scheme in the case of file:// URLs.
    233   std::string path_;
    234 
    235   // The path with "?" and "\" characters escaped for use with the
    236   // MatchPattern() function.
    237   std::string path_escaped_;
    238 
    239   // A string representing this URLPattern.
    240   mutable std::string spec_;
    241 };
    242 
    243 typedef std::vector<URLPattern> URLPatternList;
    244 
    245 #endif  // EXTENSIONS_COMMON_URL_PATTERN_H_
    246