Home | History | Annotate | Download | only in url_matcher
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "components/url_matcher/regex_set_matcher.h"
      6 
      7 #include "base/logging.h"
      8 #include "base/stl_util.h"
      9 #include "base/strings/string_util.h"
     10 #include "components/url_matcher/substring_set_matcher.h"
     11 #include "third_party/re2/re2/filtered_re2.h"
     12 #include "third_party/re2/re2/re2.h"
     13 
     14 namespace url_matcher {
     15 
     16 RegexSetMatcher::RegexSetMatcher() {}
     17 
     18 RegexSetMatcher::~RegexSetMatcher() {
     19   DeleteSubstringPatterns();
     20 }
     21 
     22 void RegexSetMatcher::AddPatterns(
     23     const std::vector<const StringPattern*>& regex_list) {
     24   if (regex_list.empty())
     25     return;
     26   for (size_t i = 0; i < regex_list.size(); ++i) {
     27     regexes_[regex_list[i]->id()] = regex_list[i];
     28   }
     29 
     30   RebuildMatcher();
     31 }
     32 
     33 void RegexSetMatcher::ClearPatterns() {
     34   regexes_.clear();
     35   RebuildMatcher();
     36 }
     37 
     38 bool RegexSetMatcher::Match(const std::string& text,
     39                             std::set<StringPattern::ID>* matches) const {
     40   size_t old_number_of_matches = matches->size();
     41   if (regexes_.empty())
     42     return false;
     43   if (!filtered_re2_.get()) {
     44     LOG(ERROR) << "RegexSetMatcher was not initialized";
     45     return false;
     46   }
     47 
     48   // FilteredRE2 expects lowercase for prefiltering, but we still
     49   // match case-sensitively.
     50   std::vector<RE2ID> atoms(FindSubstringMatches(
     51       base::StringToLowerASCII(text)));
     52 
     53   std::vector<RE2ID> re2_ids;
     54   filtered_re2_->AllMatches(text, atoms, &re2_ids);
     55 
     56   for (size_t i = 0; i < re2_ids.size(); ++i) {
     57     StringPattern::ID id = re2_id_map_[re2_ids[i]];
     58     matches->insert(id);
     59   }
     60   return old_number_of_matches != matches->size();
     61 }
     62 
     63 bool RegexSetMatcher::IsEmpty() const {
     64   return regexes_.empty();
     65 }
     66 
     67 std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches(
     68     const std::string& text) const {
     69   std::set<int> atoms_set;
     70   substring_matcher_->Match(text, &atoms_set);
     71   return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end());
     72 }
     73 
     74 void RegexSetMatcher::RebuildMatcher() {
     75   re2_id_map_.clear();
     76   filtered_re2_.reset(new re2::FilteredRE2());
     77   if (regexes_.empty())
     78     return;
     79 
     80   for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) {
     81     RE2ID re2_id;
     82     RE2::ErrorCode error = filtered_re2_->Add(
     83         it->second->pattern(), RE2::DefaultOptions, &re2_id);
     84     if (error == RE2::NoError) {
     85       DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id);
     86       re2_id_map_.push_back(it->first);
     87     } else {
     88       // Unparseable regexes should have been rejected already in
     89       // URLMatcherFactory::CreateURLMatchesCondition.
     90       LOG(ERROR) << "Could not parse regex (id=" << it->first << ", "
     91                  << it->second->pattern() << ")";
     92     }
     93   }
     94 
     95   std::vector<std::string> strings_to_match;
     96   filtered_re2_->Compile(&strings_to_match);
     97 
     98   substring_matcher_.reset(new SubstringSetMatcher);
     99   DeleteSubstringPatterns();
    100   // Build SubstringSetMatcher from |strings_to_match|.
    101   // SubstringSetMatcher doesn't own its strings.
    102   for (size_t i = 0; i < strings_to_match.size(); ++i) {
    103     substring_patterns_.push_back(
    104         new StringPattern(strings_to_match[i], i));
    105   }
    106   substring_matcher_->RegisterPatterns(substring_patterns_);
    107 }
    108 
    109 void RegexSetMatcher::DeleteSubstringPatterns() {
    110   STLDeleteElements(&substring_patterns_);
    111 }
    112 
    113 }  // namespace url_matcher
    114