Home | History | Annotate | Download | only in re2
      1 // Copyright 2010 The RE2 Authors.  All Rights Reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 #include "re2/set.h"
      6 
      7 #include "util/util.h"
      8 #include "re2/stringpiece.h"
      9 #include "re2/prog.h"
     10 #include "re2/re2.h"
     11 #include "re2/regexp.h"
     12 
     13 using namespace re2;
     14 
     15 RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
     16   options_.Copy(options);
     17   anchor_ = anchor;
     18   prog_ = NULL;
     19   compiled_ = false;
     20 }
     21 
     22 RE2::Set::~Set() {
     23   for (int i = 0; i < re_.size(); i++)
     24     re_[i]->Decref();
     25   delete prog_;
     26 }
     27 
     28 int RE2::Set::Add(const StringPiece& pattern, string* error) {
     29   if (compiled_) {
     30     LOG(DFATAL) << "RE2::Set::Add after Compile";
     31     return -1;
     32   }
     33 
     34   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
     35     options_.ParseFlags());
     36 
     37   RegexpStatus status;
     38   re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
     39   if (re == NULL) {
     40     if (error != NULL)
     41       *error = status.Text();
     42     if (options_.log_errors())
     43       LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
     44     return -1;
     45   }
     46 
     47   // Concatenate with match index and push on vector.
     48   int n = re_.size();
     49   re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
     50   if (re->op() == kRegexpConcat) {
     51     int nsub = re->nsub();
     52     re2::Regexp** sub = new re2::Regexp*[nsub + 1];
     53     for (int i = 0; i < nsub; i++)
     54       sub[i] = re->sub()[i]->Incref();
     55     sub[nsub] = m;
     56     re->Decref();
     57     re = re2::Regexp::Concat(sub, nsub + 1, pf);
     58     delete[] sub;
     59   } else {
     60     re2::Regexp* sub[2];
     61     sub[0] = re;
     62     sub[1] = m;
     63     re = re2::Regexp::Concat(sub, 2, pf);
     64   }
     65   re_.push_back(re);
     66   return n;
     67 }
     68 
     69 bool RE2::Set::Compile() {
     70   if (compiled_) {
     71     LOG(DFATAL) << "RE2::Set::Compile multiple times";
     72     return false;
     73   }
     74   compiled_ = true;
     75 
     76   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
     77     options_.ParseFlags());
     78   re2::Regexp* re = re2::Regexp::Alternate(const_cast<re2::Regexp**>(&re_[0]),
     79                                            re_.size(), pf);
     80   re_.clear();
     81   re2::Regexp* sre = re->Simplify();
     82   re->Decref();
     83   re = sre;
     84   if (re == NULL) {
     85     if (options_.log_errors())
     86       LOG(ERROR) << "Error simplifying during Compile.";
     87     return false;
     88   }
     89 
     90   prog_ = Prog::CompileSet(options_, anchor_, re);
     91   return prog_ != NULL;
     92 }
     93 
     94 bool RE2::Set::Match(const StringPiece& text, vector<int>* v) const {
     95   if (!compiled_) {
     96     LOG(DFATAL) << "RE2::Set::Match without Compile";
     97     return false;
     98   }
     99   v->clear();
    100   bool failed;
    101   bool ret = prog_->SearchDFA(text, text, Prog::kAnchored,
    102                               Prog::kManyMatch, NULL, &failed, v);
    103   if (failed)
    104     LOG(DFATAL) << "RE2::Set::Match: DFA ran out of cache space";
    105 
    106   if (ret == false)
    107     return false;
    108   if (v->size() == 0) {
    109     LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set";
    110     return false;
    111   }
    112   return true;
    113 }
    114