Home | History | Annotate | Download | only in matcher
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "extensions/common/matcher/regex_set_matcher.h"
      6 
      7 #include "base/logging.h"
      8 #include "base/stl_util.h"
      9 #include "base/strings/string_util.h"
     10 #include "extensions/common/matcher/substring_set_matcher.h"
     11 #include "third_party/re2/re2/filtered_re2.h"
     12 #include "third_party/re2/re2/re2.h"
     13 
     14 namespace extensions {
     15 
     16 RegexSetMatcher::RegexSetMatcher() {}
     17 
     18 RegexSetMatcher::~RegexSetMatcher() {
     19   DeleteSubstringPatterns();
     20 }
     21 
     22 void RegexSetMatcher::AddPatterns(
     23     const std::vector<const StringPattern*>& regex_list) {
     24   if (regex_list.empty())
     25     return;
     26   for (size_t i = 0; i < regex_list.size(); ++i) {
     27     regexes_[regex_list[i]->id()] = regex_list[i];
     28   }
     29 
     30   RebuildMatcher();
     31 }
     32 
     33 void RegexSetMatcher::ClearPatterns() {
     34   regexes_.clear();
     35   RebuildMatcher();
     36 }
     37 
     38 bool RegexSetMatcher::Match(const std::string& text,
     39                             std::set<StringPattern::ID>* matches) const {
     40   size_t old_number_of_matches = matches->size();
     41   if (regexes_.empty())
     42     return false;
     43   if (!filtered_re2_.get()) {
     44     LOG(ERROR) << "RegexSetMatcher was not initialized";
     45     return false;
     46   }
     47 
     48   // FilteredRE2 expects lowercase for prefiltering, but we still
     49   // match case-sensitively.
     50   std::vector<RE2ID> atoms(FindSubstringMatches(
     51       StringToLowerASCII(text)));
     52 
     53   std::vector<RE2ID> re2_ids;
     54   filtered_re2_->AllMatches(text, atoms, &re2_ids);
     55 
     56   std::set<StringPattern::ID> matched_ids;
     57   for (size_t i = 0; i < re2_ids.size(); ++i) {
     58     StringPattern::ID id = re2_id_map_[re2_ids[i]];
     59     matches->insert(id);
     60   }
     61   return old_number_of_matches != matches->size();
     62 }
     63 
     64 bool RegexSetMatcher::IsEmpty() const {
     65   return regexes_.empty();
     66 }
     67 
     68 std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches(
     69     const std::string& text) const {
     70   std::set<int> atoms_set;
     71   substring_matcher_->Match(text, &atoms_set);
     72   return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end());
     73 }
     74 
     75 void RegexSetMatcher::RebuildMatcher() {
     76   re2_id_map_.clear();
     77   filtered_re2_.reset(new re2::FilteredRE2());
     78   if (regexes_.empty())
     79     return;
     80 
     81   for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) {
     82     RE2ID re2_id;
     83     RE2::ErrorCode error = filtered_re2_->Add(
     84         it->second->pattern(), RE2::DefaultOptions, &re2_id);
     85     if (error == RE2::NoError) {
     86       DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id);
     87       re2_id_map_.push_back(it->first);
     88     } else {
     89       // Unparseable regexes should have been rejected already in
     90       // URLMatcherFactory::CreateURLMatchesCondition.
     91       LOG(ERROR) << "Could not parse regex (id=" << it->first << ", "
     92                  << it->second->pattern() << ")";
     93     }
     94   }
     95 
     96   std::vector<std::string> strings_to_match;
     97   filtered_re2_->Compile(&strings_to_match);
     98 
     99   substring_matcher_.reset(new SubstringSetMatcher);
    100   DeleteSubstringPatterns();
    101   // Build SubstringSetMatcher from |strings_to_match|.
    102   // SubstringSetMatcher doesn't own its strings.
    103   for (size_t i = 0; i < strings_to_match.size(); ++i) {
    104     substring_patterns_.push_back(
    105         new StringPattern(strings_to_match[i], i));
    106   }
    107   substring_matcher_->RegisterPatterns(substring_patterns_);
    108 }
    109 
    110 void RegexSetMatcher::DeleteSubstringPatterns() {
    111   STLDeleteElements(&substring_patterns_);
    112 }
    113 
    114 }  // namespace extensions
    115