1 // Copyright 2010 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "re2/set.h" 6 7 #include "util/util.h" 8 #include "re2/stringpiece.h" 9 #include "re2/prog.h" 10 #include "re2/re2.h" 11 #include "re2/regexp.h" 12 13 using namespace re2; 14 15 RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) { 16 options_.Copy(options); 17 anchor_ = anchor; 18 prog_ = NULL; 19 compiled_ = false; 20 } 21 22 RE2::Set::~Set() { 23 for (int i = 0; i < re_.size(); i++) 24 re_[i]->Decref(); 25 delete prog_; 26 } 27 28 int RE2::Set::Add(const StringPiece& pattern, string* error) { 29 if (compiled_) { 30 LOG(DFATAL) << "RE2::Set::Add after Compile"; 31 return -1; 32 } 33 34 Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( 35 options_.ParseFlags()); 36 37 RegexpStatus status; 38 re2::Regexp* re = Regexp::Parse(pattern, pf, &status); 39 if (re == NULL) { 40 if (error != NULL) 41 *error = status.Text(); 42 if (options_.log_errors()) 43 LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text(); 44 return -1; 45 } 46 47 // Concatenate with match index and push on vector. 48 int n = re_.size(); 49 re2::Regexp* m = re2::Regexp::HaveMatch(n, pf); 50 if (re->op() == kRegexpConcat) { 51 int nsub = re->nsub(); 52 re2::Regexp** sub = new re2::Regexp*[nsub + 1]; 53 for (int i = 0; i < nsub; i++) 54 sub[i] = re->sub()[i]->Incref(); 55 sub[nsub] = m; 56 re->Decref(); 57 re = re2::Regexp::Concat(sub, nsub + 1, pf); 58 delete[] sub; 59 } else { 60 re2::Regexp* sub[2]; 61 sub[0] = re; 62 sub[1] = m; 63 re = re2::Regexp::Concat(sub, 2, pf); 64 } 65 re_.push_back(re); 66 return n; 67 } 68 69 bool RE2::Set::Compile() { 70 if (compiled_) { 71 LOG(DFATAL) << "RE2::Set::Compile multiple times"; 72 return false; 73 } 74 compiled_ = true; 75 76 Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( 77 options_.ParseFlags()); 78 re2::Regexp* re = re2::Regexp::Alternate(const_cast<re2::Regexp**>(&re_[0]), 79 re_.size(), pf); 80 re_.clear(); 81 re2::Regexp* sre = re->Simplify(); 82 re->Decref(); 83 re = sre; 84 if (re == NULL) { 85 if (options_.log_errors()) 86 LOG(ERROR) << "Error simplifying during Compile."; 87 return false; 88 } 89 90 prog_ = Prog::CompileSet(options_, anchor_, re); 91 return prog_ != NULL; 92 } 93 94 bool RE2::Set::Match(const StringPiece& text, vector<int>* v) const { 95 if (!compiled_) { 96 LOG(DFATAL) << "RE2::Set::Match without Compile"; 97 return false; 98 } 99 v->clear(); 100 bool failed; 101 bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, 102 Prog::kManyMatch, NULL, &failed, v); 103 if (failed) 104 LOG(DFATAL) << "RE2::Set::Match: DFA ran out of cache space"; 105 106 if (ret == false) 107 return false; 108 if (v->size() == 0) { 109 LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set"; 110 return false; 111 } 112 return true; 113 } 114