Home | History | Annotate | Download | only in matcher
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef EXTENSIONS_COMMON_MATCHER_REGEX_SET_MATCHER_H_
      6 #define EXTENSIONS_COMMON_MATCHER_REGEX_SET_MATCHER_H_
      7 
      8 #include <map>
      9 #include <set>
     10 #include <string>
     11 #include <vector>
     12 
     13 #include "base/memory/scoped_ptr.h"
     14 #include "extensions/common/matcher/string_pattern.h"
     15 #include "extensions/common/matcher/substring_set_matcher.h"
     16 
     17 namespace re2 {
     18 class FilteredRE2;
     19 }
     20 
     21 namespace extensions {
     22 
     23 // Efficiently matches URLs against a collection of regular expressions,
     24 // using FilteredRE2 to reduce the number of regexes that must be matched
     25 // by pre-filtering with substring matching. See:
     26 // http://swtch.com/~rsc/regexp/regexp3.html#analysis
     27 class RegexSetMatcher {
     28  public:
     29   RegexSetMatcher();
     30   virtual ~RegexSetMatcher();
     31 
     32   // Adds the regex patterns in |regex_list| to the matcher. Also rebuilds
     33   // the FilteredRE2 matcher; thus, for efficiency, prefer adding multiple
     34   // patterns at once.
     35   // Ownership of the patterns remains with the caller.
     36   void AddPatterns(const std::vector<const StringPattern*>& regex_list);
     37 
     38   // Removes all regex patterns.
     39   void ClearPatterns();
     40 
     41   // Appends the IDs of regular expressions in our set that match the |text|
     42   // to |matches|.
     43   bool Match(const std::string& text,
     44              std::set<StringPattern::ID>* matches) const;
     45 
     46   bool IsEmpty() const;
     47 
     48  private:
     49   typedef int RE2ID;
     50   typedef std::map<StringPattern::ID, const StringPattern*> RegexMap;
     51   typedef std::vector<StringPattern::ID> RE2IDMap;
     52 
     53   // Use Aho-Corasick SubstringSetMatcher to find which literal patterns
     54   // match the |text|.
     55   std::vector<RE2ID> FindSubstringMatches(const std::string& text) const;
     56 
     57   // Rebuild FilteredRE2 from scratch. Needs to be called whenever
     58   // our set of regexes changes.
     59   // TODO(yoz): investigate if it could be done incrementally;
     60   // apparently not supported by FilteredRE2.
     61   void RebuildMatcher();
     62 
     63   // Clean up StringPatterns in |substring_patterns_|.
     64   void DeleteSubstringPatterns();
     65 
     66   // Mapping of regex StringPattern::IDs to regexes.
     67   RegexMap regexes_;
     68   // Mapping of RE2IDs from FilteredRE2 (which are assigned in order)
     69   // to regex StringPattern::IDs.
     70   RE2IDMap re2_id_map_;
     71 
     72   scoped_ptr<re2::FilteredRE2> filtered_re2_;
     73   scoped_ptr<SubstringSetMatcher> substring_matcher_;
     74 
     75   // The substring patterns from FilteredRE2, which are used in
     76   // |substring_matcher_| but whose lifetime is managed here.
     77   std::vector<const StringPattern*> substring_patterns_;
     78 };
     79 
     80 }  // namespace extensions
     81 
     82 #endif  // EXTENSIONS_COMMON_MATCHER_REGEX_SET_MATCHER_H_
     83