1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "components/url_matcher/regex_set_matcher.h" 6 7 #include "base/logging.h" 8 #include "base/stl_util.h" 9 #include "base/strings/string_util.h" 10 #include "components/url_matcher/substring_set_matcher.h" 11 #include "third_party/re2/re2/filtered_re2.h" 12 #include "third_party/re2/re2/re2.h" 13 14 namespace url_matcher { 15 16 RegexSetMatcher::RegexSetMatcher() {} 17 18 RegexSetMatcher::~RegexSetMatcher() { 19 DeleteSubstringPatterns(); 20 } 21 22 void RegexSetMatcher::AddPatterns( 23 const std::vector<const StringPattern*>& regex_list) { 24 if (regex_list.empty()) 25 return; 26 for (size_t i = 0; i < regex_list.size(); ++i) { 27 regexes_[regex_list[i]->id()] = regex_list[i]; 28 } 29 30 RebuildMatcher(); 31 } 32 33 void RegexSetMatcher::ClearPatterns() { 34 regexes_.clear(); 35 RebuildMatcher(); 36 } 37 38 bool RegexSetMatcher::Match(const std::string& text, 39 std::set<StringPattern::ID>* matches) const { 40 size_t old_number_of_matches = matches->size(); 41 if (regexes_.empty()) 42 return false; 43 if (!filtered_re2_.get()) { 44 LOG(ERROR) << "RegexSetMatcher was not initialized"; 45 return false; 46 } 47 48 // FilteredRE2 expects lowercase for prefiltering, but we still 49 // match case-sensitively. 50 std::vector<RE2ID> atoms(FindSubstringMatches( 51 StringToLowerASCII(text))); 52 53 std::vector<RE2ID> re2_ids; 54 filtered_re2_->AllMatches(text, atoms, &re2_ids); 55 56 for (size_t i = 0; i < re2_ids.size(); ++i) { 57 StringPattern::ID id = re2_id_map_[re2_ids[i]]; 58 matches->insert(id); 59 } 60 return old_number_of_matches != matches->size(); 61 } 62 63 bool RegexSetMatcher::IsEmpty() const { 64 return regexes_.empty(); 65 } 66 67 std::vector<RegexSetMatcher::RE2ID> RegexSetMatcher::FindSubstringMatches( 68 const std::string& text) const { 69 std::set<int> atoms_set; 70 substring_matcher_->Match(text, &atoms_set); 71 return std::vector<RE2ID>(atoms_set.begin(), atoms_set.end()); 72 } 73 74 void RegexSetMatcher::RebuildMatcher() { 75 re2_id_map_.clear(); 76 filtered_re2_.reset(new re2::FilteredRE2()); 77 if (regexes_.empty()) 78 return; 79 80 for (RegexMap::iterator it = regexes_.begin(); it != regexes_.end(); ++it) { 81 RE2ID re2_id; 82 RE2::ErrorCode error = filtered_re2_->Add( 83 it->second->pattern(), RE2::DefaultOptions, &re2_id); 84 if (error == RE2::NoError) { 85 DCHECK_EQ(static_cast<RE2ID>(re2_id_map_.size()), re2_id); 86 re2_id_map_.push_back(it->first); 87 } else { 88 // Unparseable regexes should have been rejected already in 89 // URLMatcherFactory::CreateURLMatchesCondition. 90 LOG(ERROR) << "Could not parse regex (id=" << it->first << ", " 91 << it->second->pattern() << ")"; 92 } 93 } 94 95 std::vector<std::string> strings_to_match; 96 filtered_re2_->Compile(&strings_to_match); 97 98 substring_matcher_.reset(new SubstringSetMatcher); 99 DeleteSubstringPatterns(); 100 // Build SubstringSetMatcher from |strings_to_match|. 101 // SubstringSetMatcher doesn't own its strings. 102 for (size_t i = 0; i < strings_to_match.size(); ++i) { 103 substring_patterns_.push_back( 104 new StringPattern(strings_to_match[i], i)); 105 } 106 substring_matcher_->RegisterPatterns(substring_patterns_); 107 } 108 109 void RegexSetMatcher::DeleteSubstringPatterns() { 110 STLDeleteElements(&substring_patterns_); 111 } 112 113 } // namespace url_matcher 114