1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "extensions/common/matcher/regex_set_matcher.h" 6 7 #include "base/logging.h" 8 #include "base/stl_util.h" 9 #include "base/strings/string_util.h" 10 #include "extensions/common/matcher/substring_set_matcher.h" 11 #include "third_party/re2/re2/filtered_re2.h" 12 #include "third_party/re2/re2/re2.h" 13 14 namespace extensions { 15 16 RegexSetMatcher::RegexSetMatcher() {} 17 18 RegexSetMatcher::~RegexSetMatcher() { 19 DeleteSubstringPatterns(); 20 } 21 22 void RegexSetMatcher::AddPatterns( 23 const std::vector<const StringPattern*>& regex_list) { 24 if (regex_list.empty()) 25 return; 26 for (size_t i = 0; i < regex_list.size(); ++i) { 27 regexes_[regex_list[i]->id()] = regex_list[i]; 28 } 29 30 RebuildMatcher(); 31 } 32 33 void RegexSetMatcher::ClearPatterns() { 34 regexes_.clear(); 35 RebuildMatcher(); 36 } 37 38 bool RegexSetMatcher::Match(const std::string& text, 39 std::set<StringPattern::ID>* matches) const { 40 size_t old_number_of_matches = matches->size(); 41 if (regexes_.empty()) 42 return false; 43 if (!filtered_re2_.get()) { 44 LOG(ERROR) << "RegexSetMatcher was not initialized"; 45 return false; 46 } 47 48 // FilteredRE2 expects lowercase for prefiltering, but we still 49 // match case-sensitively. 50 std::vector<RE2ID> atoms(FindSubstringMatches( 51 StringToLowerASCII(text))); 52 53 std::vector<RE2ID> re2_ids; 54 filtered_re2_->AllMatches(text, atoms, &re2_ids); 55 56 std::set<StringPattern::ID> matched_ids; 57 for (size_t i = 0; i < re2_ids.size(); ++i) { 58 StringPattern::ID id = re2_id_map_[re2_ids[i]]; 59 matches->insert(id); 60 } 61 return old_number_of_matches != matches->size(); 62 } 63 64 bool RegexSetMatcher::IsEmpty() const { 65 return regexes_.empty(); 66 } 67 68 std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches( 69 const std::string& text) const { 70 std::set<int> atoms_set; 71 substring_matcher_->Match(text, &atoms_set); 72 return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end()); 73 } 74 75 void RegexSetMatcher::RebuildMatcher() { 76 re2_id_map_.clear(); 77 filtered_re2_.reset(new re2::FilteredRE2()); 78 if (regexes_.empty()) 79 return; 80 81 for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) { 82 RE2ID re2_id; 83 RE2::ErrorCode error = filtered_re2_->Add( 84 it->second->pattern(), RE2::DefaultOptions, &re2_id); 85 if (error == RE2::NoError) { 86 DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id); 87 re2_id_map_.push_back(it->first); 88 } else { 89 // Unparseable regexes should have been rejected already in 90 // URLMatcherFactory::CreateURLMatchesCondition. 91 LOG(ERROR) << "Could not parse regex (id=" << it->first << ", " 92 << it->second->pattern() << ")"; 93 } 94 } 95 96 std::vector<std::string> strings_to_match; 97 filtered_re2_->Compile(&strings_to_match); 98 99 substring_matcher_.reset(new SubstringSetMatcher); 100 DeleteSubstringPatterns(); 101 // Build SubstringSetMatcher from |strings_to_match|. 102 // SubstringSetMatcher doesn't own its strings. 103 for (size_t i = 0; i < strings_to_match.size(); ++i) { 104 substring_patterns_.push_back( 105 new StringPattern(strings_to_match[i], i)); 106 } 107 substring_matcher_->RegisterPatterns(substring_patterns_); 108 } 109 110 void RegexSetMatcher::DeleteSubstringPatterns() { 111 STLDeleteElements(&substring_patterns_); 112 } 113 114 } // namespace extensions 115