Home | History | Annotate | Download | only in include
      1 // Copyright (c) 2005, Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 //
     30 // Author: Sanjay Ghemawat
     31 // Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005
     32 
     33 #ifndef _PCRECPP_H
     34 #define _PCRECPP_H
     35 
     36 // C++ interface to the pcre regular-expression library.  RE supports
     37 // Perl-style regular expressions (with extensions like \d, \w, \s,
     38 // ...).
     39 //
     40 // -----------------------------------------------------------------------
     41 // REGEXP SYNTAX:
     42 //
     43 // This module is part of the pcre library and hence supports its syntax
     44 // for regular expressions.
     45 //
     46 // The syntax is pretty similar to Perl's.  For those not familiar
     47 // with Perl's regular expressions, here are some examples of the most
     48 // commonly used extensions:
     49 //
     50 //   "hello (\\w+) world"  -- \w matches a "word" character
     51 //   "version (\\d+)"      -- \d matches a digit
     52 //   "hello\\s+world"      -- \s matches any whitespace character
     53 //   "\\b(\\w+)\\b"        -- \b matches empty string at a word boundary
     54 //   "(?i)hello"           -- (?i) turns on case-insensitive matching
     55 //   "/\\*(.*?)\\*/"       -- .*? matches . minimum no. of times possible
     56 //
     57 // -----------------------------------------------------------------------
     58 // MATCHING INTERFACE:
     59 //
     60 // The "FullMatch" operation checks that supplied text matches a
     61 // supplied pattern exactly.
     62 //
     63 // Example: successful match
     64 //    pcrecpp::RE re("h.*o");
     65 //    re.FullMatch("hello");
     66 //
     67 // Example: unsuccessful match (requires full match):
     68 //    pcrecpp::RE re("e");
     69 //    !re.FullMatch("hello");
     70 //
     71 // Example: creating a temporary RE object:
     72 //    pcrecpp::RE("h.*o").FullMatch("hello");
     73 //
     74 // You can pass in a "const char*" or a "string" for "text".  The
     75 // examples below tend to use a const char*.
     76 //
     77 // You can, as in the different examples above, store the RE object
     78 // explicitly in a variable or use a temporary RE object.  The
     79 // examples below use one mode or the other arbitrarily.  Either
     80 // could correctly be used for any of these examples.
     81 //
     82 // -----------------------------------------------------------------------
     83 // MATCHING WITH SUB-STRING EXTRACTION:
     84 //
     85 // You can supply extra pointer arguments to extract matched subpieces.
     86 //
     87 // Example: extracts "ruby" into "s" and 1234 into "i"
     88 //    int i;
     89 //    string s;
     90 //    pcrecpp::RE re("(\\w+):(\\d+)");
     91 //    re.FullMatch("ruby:1234", &s, &i);
     92 //
     93 // Example: does not try to extract any extra sub-patterns
     94 //    re.FullMatch("ruby:1234", &s);
     95 //
     96 // Example: does not try to extract into NULL
     97 //    re.FullMatch("ruby:1234", NULL, &i);
     98 //
     99 // Example: integer overflow causes failure
    100 //    !re.FullMatch("ruby:1234567891234", NULL, &i);
    101 //
    102 // Example: fails because there aren't enough sub-patterns:
    103 //    !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);
    104 //
    105 // Example: fails because string cannot be stored in integer
    106 //    !pcrecpp::RE("(.*)").FullMatch("ruby", &i);
    107 //
    108 // The provided pointer arguments can be pointers to any scalar numeric
    109 // type, or one of
    110 //    string        (matched piece is copied to string)
    111 //    StringPiece   (StringPiece is mutated to point to matched piece)
    112 //    T             (where "bool T::ParseFrom(const char*, int)" exists)
    113 //    NULL          (the corresponding matched sub-pattern is not copied)
    114 //
    115 // CAVEAT: An optional sub-pattern that does not exist in the matched
    116 // string is assigned the empty string.  Therefore, the following will
    117 // return false (because the empty string is not a valid number):
    118 //    int number;
    119 //    pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
    120 //
    121 // -----------------------------------------------------------------------
    122 // DO_MATCH
    123 //
    124 // The matching interface supports at most 16 arguments per call.
    125 // If you need more, consider using the more general interface
    126 // pcrecpp::RE::DoMatch().  See pcrecpp.h for the signature for DoMatch.
    127 //
    128 // -----------------------------------------------------------------------
    129 // PARTIAL MATCHES
    130 //
    131 // You can use the "PartialMatch" operation when you want the pattern
    132 // to match any substring of the text.
    133 //
    134 // Example: simple search for a string:
    135 //    pcrecpp::RE("ell").PartialMatch("hello");
    136 //
    137 // Example: find first number in a string:
    138 //    int number;
    139 //    pcrecpp::RE re("(\\d+)");
    140 //    re.PartialMatch("x*100 + 20", &number);
    141 //    assert(number == 100);
    142 //
    143 // -----------------------------------------------------------------------
    144 // UTF-8 AND THE MATCHING INTERFACE:
    145 //
    146 // By default, pattern and text are plain text, one byte per character.
    147 // The UTF8 flag, passed to the constructor, causes both pattern
    148 // and string to be treated as UTF-8 text, still a byte stream but
    149 // potentially multiple bytes per character. In practice, the text
    150 // is likelier to be UTF-8 than the pattern, but the match returned
    151 // may depend on the UTF8 flag, so always use it when matching
    152 // UTF8 text.  E.g., "." will match one byte normally but with UTF8
    153 // set may match up to three bytes of a multi-byte character.
    154 //
    155 // Example:
    156 //    pcrecpp::RE_Options options;
    157 //    options.set_utf8();
    158 //    pcrecpp::RE re(utf8_pattern, options);
    159 //    re.FullMatch(utf8_string);
    160 //
    161 // Example: using the convenience function UTF8():
    162 //    pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
    163 //    re.FullMatch(utf8_string);
    164 //
    165 // NOTE: The UTF8 option is ignored if pcre was not configured with the
    166 //       --enable-utf8 flag.
    167 //
    168 // -----------------------------------------------------------------------
    169 // PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE
    170 //
    171 // PCRE defines some modifiers to change the behavior of the regular
    172 // expression engine.
    173 // The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle
    174 // to pass such modifiers to a RE class.
    175 //
    176 // Currently, the following modifiers are supported
    177 //
    178 //    modifier              description               Perl corresponding
    179 //
    180 //    PCRE_CASELESS         case insensitive match    /i
    181 //    PCRE_MULTILINE        multiple lines match      /m
    182 //    PCRE_DOTALL           dot matches newlines      /s
    183 //    PCRE_DOLLAR_ENDONLY   $ matches only at end     N/A
    184 //    PCRE_EXTRA            strict escape parsing     N/A
    185 //    PCRE_EXTENDED         ignore whitespaces        /x
    186 //    PCRE_UTF8             handles UTF8 chars        built-in
    187 //    PCRE_UNGREEDY         reverses * and *?         N/A
    188 //    PCRE_NO_AUTO_CAPTURE  disables matching parens  N/A (*)
    189 //
    190 // (For a full account on how each modifier works, please check the
    191 // PCRE API reference manual).
    192 //
    193 // (*) Both Perl and PCRE allow non matching parentheses by means of the
    194 // "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
    195 // capture, while (ab|cd) does.
    196 //
    197 // For each modifier, there are two member functions whose name is made
    198 // out of the modifier in lowercase, without the "PCRE_" prefix. For
    199 // instance, PCRE_CASELESS is handled by
    200 //    bool caseless(),
    201 // which returns true if the modifier is set, and
    202 //    RE_Options & set_caseless(bool),
    203 // which sets or unsets the modifier.
    204 //
    205 // Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the
    206 // set_match_limit() and match_limit() member functions.
    207 // Setting match_limit to a non-zero value will limit the executation of
    208 // pcre to keep it from doing bad things like blowing the stack or taking
    209 // an eternity to return a result.  A value of 5000 is good enough to stop
    210 // stack blowup in a 2MB thread stack.  Setting match_limit to zero will
    211 // disable match limiting.  Alternately, you can set match_limit_recursion()
    212 // which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much pcre
    213 // recurses.  match_limit() caps the number of matches pcre does;
    214 // match_limit_recrusion() caps the depth of recursion.
    215 //
    216 // Normally, to pass one or more modifiers to a RE class, you declare
    217 // a RE_Options object, set the appropriate options, and pass this
    218 // object to a RE constructor. Example:
    219 //
    220 //    RE_options opt;
    221 //    opt.set_caseless(true);
    222 //
    223 //    if (RE("HELLO", opt).PartialMatch("hello world")) ...
    224 //
    225 // RE_options has two constructors. The default constructor takes no
    226 // arguments and creates a set of flags that are off by default.
    227 //
    228 // The optional parameter 'option_flags' is to facilitate transfer
    229 // of legacy code from C programs.  This lets you do
    230 //    RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
    231 //
    232 // But new code is better off doing
    233 //    RE(pattern,
    234 //      RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
    235 // (See below)
    236 //
    237 // If you are going to pass one of the most used modifiers, there are some
    238 // convenience functions that return a RE_Options class with the
    239 // appropriate modifier already set:
    240 // CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED()
    241 //
    242 // If you need to set several options at once, and you don't want to go
    243 // through the pains of declaring a RE_Options object and setting several
    244 // options, there is a parallel method that give you such ability on the
    245 // fly. You can concatenate several set_xxxxx member functions, since each
    246 // of them returns a reference to its class object.  e.g.: to pass
    247 // PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one
    248 // statement, you may write
    249 //
    250 //    RE(" ^ xyz \\s+ .* blah$", RE_Options()
    251 //                            .set_caseless(true)
    252 //                            .set_extended(true)
    253 //                            .set_multiline(true)).PartialMatch(sometext);
    254 //
    255 // -----------------------------------------------------------------------
    256 // SCANNING TEXT INCREMENTALLY
    257 //
    258 // The "Consume" operation may be useful if you want to repeatedly
    259 // match regular expressions at the front of a string and skip over
    260 // them as they match.  This requires use of the "StringPiece" type,
    261 // which represents a sub-range of a real string.  Like RE, StringPiece
    262 // is defined in the pcrecpp namespace.
    263 //
    264 // Example: read lines of the form "var = value" from a string.
    265 //    string contents = ...;                 // Fill string somehow
    266 //    pcrecpp::StringPiece input(contents);  // Wrap in a StringPiece
    267 //
    268 //    string var;
    269 //    int value;
    270 //    pcrecpp::RE re("(\\w+) = (\\d+)\n");
    271 //    while (re.Consume(&input, &var, &value)) {
    272 //      ...;
    273 //    }
    274 //
    275 // Each successful call to "Consume" will set "var/value", and also
    276 // advance "input" so it points past the matched text.
    277 //
    278 // The "FindAndConsume" operation is similar to "Consume" but does not
    279 // anchor your match at the beginning of the string.  For example, you
    280 // could extract all words from a string by repeatedly calling
    281 //     pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
    282 //
    283 // -----------------------------------------------------------------------
    284 // PARSING HEX/OCTAL/C-RADIX NUMBERS
    285 //
    286 // By default, if you pass a pointer to a numeric value, the
    287 // corresponding text is interpreted as a base-10 number.  You can
    288 // instead wrap the pointer with a call to one of the operators Hex(),
    289 // Octal(), or CRadix() to interpret the text in another base.  The
    290 // CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
    291 // prefixes, but defaults to base-10.
    292 //
    293 // Example:
    294 //   int a, b, c, d;
    295 //   pcrecpp::RE re("(.*) (.*) (.*) (.*)");
    296 //   re.FullMatch("100 40 0100 0x40",
    297 //                pcrecpp::Octal(&a), pcrecpp::Hex(&b),
    298 //                pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));
    299 // will leave 64 in a, b, c, and d.
    300 //
    301 // -----------------------------------------------------------------------
    302 // REPLACING PARTS OF STRINGS
    303 //
    304 // You can replace the first match of "pattern" in "str" with
    305 // "rewrite".  Within "rewrite", backslash-escaped digits (\1 to \9)
    306 // can be used to insert text matching corresponding parenthesized
    307 // group from the pattern.  \0 in "rewrite" refers to the entire
    308 // matching text.  E.g.,
    309 //
    310 //   string s = "yabba dabba doo";
    311 //   pcrecpp::RE("b+").Replace("d", &s);
    312 //
    313 // will leave "s" containing "yada dabba doo".  The result is true if
    314 // the pattern matches and a replacement occurs, or false otherwise.
    315 //
    316 // GlobalReplace() is like Replace(), except that it replaces all
    317 // occurrences of the pattern in the string with the rewrite.
    318 // Replacements are not subject to re-matching.  E.g.,
    319 //
    320 //   string s = "yabba dabba doo";
    321 //   pcrecpp::RE("b+").GlobalReplace("d", &s);
    322 //
    323 // will leave "s" containing "yada dada doo".  It returns the number
    324 // of replacements made.
    325 //
    326 // Extract() is like Replace(), except that if the pattern matches,
    327 // "rewrite" is copied into "out" (an additional argument) with
    328 // substitutions.  The non-matching portions of "text" are ignored.
    329 // Returns true iff a match occurred and the extraction happened
    330 // successfully.  If no match occurs, the string is left unaffected.
    331 
    332 
    333 #include <string>
    334 #include <pcre2.h>
    335 #include <pcrecpparg.h>   // defines the Arg class
    336 // This isn't technically needed here, but we include it
    337 // anyway so folks who include pcrecpp.h don't have to.
    338 #include <pcre_stringpiece.h>
    339 #include <memory>
    340 
    341 namespace pcrecpp {
    342 
    343 #define PCRE_SET_OR_CLEAR(b, o) \
    344     if (b) all_options_ |= (o); else all_options_ &= ~(o); \
    345     return *this
    346 
    347 #define PCRE_IS_SET(o)  \
    348         (all_options_ & o) == o
    349 
    350 typedef std::shared_ptr<pcre2_match_data> pcre2_match_data_ptr;
    351 
    352 /***** Compiling regular expressions: the RE class *****/
    353 
    354 // RE_Options allow you to set options to be passed along to pcre,
    355 // along with other options we put on top of pcre.
    356 // Only 9 modifiers, plus match_limit and match_limit_recursion,
    357 // are supported now.
    358 class RE_Options {
    359  public:
    360   // constructor
    361   RE_Options()
    362       : newline_mode_(0),
    363         match_limit_(0),
    364         match_limit_recursion_(0),
    365         all_options_(0) {
    366   }
    367 
    368   // alternative constructor.
    369   // To facilitate transfer of legacy code from C programs
    370   //
    371   // This lets you do
    372   //    RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
    373   // But new code is better off doing
    374   //    RE(pattern,
    375   //      RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
    376   RE_Options(int option_flags)
    377       : newline_mode_(0),
    378         match_limit_(0),
    379         match_limit_recursion_(0),
    380         all_options_(option_flags) {
    381   }
    382   // we're fine with the default destructor, copy constructor, etc.
    383 
    384   // accessors and mutators
    385   int match_limit() const { return match_limit_; };
    386   RE_Options &set_match_limit(int limit) {
    387     match_limit_ = limit;
    388     return *this;
    389   }
    390 
    391   int match_limit_recursion() const { return match_limit_recursion_; };
    392   RE_Options &set_match_limit_recursion(int limit) {
    393     match_limit_recursion_ = limit;
    394     return *this;
    395   }
    396 
    397   bool caseless() const {
    398     return PCRE_IS_SET(PCRE2_CASELESS);
    399   }
    400   RE_Options &set_caseless(bool x) {
    401     PCRE_SET_OR_CLEAR(x, PCRE2_CASELESS);
    402   }
    403 
    404   bool multiline() const {
    405     return PCRE_IS_SET(PCRE2_MULTILINE);
    406   }
    407   RE_Options &set_multiline(bool x) {
    408     PCRE_SET_OR_CLEAR(x, PCRE2_MULTILINE);
    409   }
    410 
    411   int newline_mode() const {
    412     if(newline_mode_)
    413       return newline_mode_;
    414     else {
    415       // if newline_mode_ is 0 return the global configuration default
    416       int value;
    417       pcre2_config_8(PCRE2_CONFIG_NEWLINE, &value);
    418       return value;
    419     }
    420   }
    421   RE_Options & set_newline_mode(int newline_mode) {
    422     newline_mode_ = newline_mode;
    423     return *this;
    424   }
    425 
    426   bool dotall() const {
    427     return PCRE_IS_SET(PCRE2_DOTALL);
    428   }
    429   RE_Options &set_dotall(bool x) {
    430     PCRE_SET_OR_CLEAR(x, PCRE2_DOTALL);
    431   }
    432 
    433   bool extended() const {
    434     return PCRE_IS_SET(PCRE2_EXTENDED);
    435   }
    436   RE_Options &set_extended(bool x) {
    437     PCRE_SET_OR_CLEAR(x, PCRE2_EXTENDED);
    438   }
    439 
    440   bool dollar_endonly() const {
    441     return PCRE_IS_SET(PCRE2_DOLLAR_ENDONLY);
    442   }
    443   RE_Options &set_dollar_endonly(bool x) {
    444     PCRE_SET_OR_CLEAR(x, PCRE2_DOLLAR_ENDONLY);
    445   }
    446 
    447   bool ungreedy() const {
    448     return PCRE_IS_SET(PCRE2_UNGREEDY);
    449   }
    450   RE_Options &set_ungreedy(bool x) {
    451     PCRE_SET_OR_CLEAR(x, PCRE2_UNGREEDY);
    452   }
    453 
    454   bool utf() const {
    455     return PCRE_IS_SET(PCRE2_UTF);
    456   }
    457   RE_Options &set_utf(bool x) {
    458     PCRE_SET_OR_CLEAR(x, PCRE2_UTF);
    459   }
    460 
    461   bool no_auto_capture() const {
    462     return PCRE_IS_SET(PCRE2_NO_AUTO_CAPTURE);
    463   }
    464   RE_Options &set_no_auto_capture(bool x) {
    465     PCRE_SET_OR_CLEAR(x, PCRE2_NO_AUTO_CAPTURE);
    466   }
    467 
    468   RE_Options &set_all_options(int opt) {
    469     all_options_ = opt;
    470     return *this;
    471   }
    472   int all_options() const {
    473     return all_options_ ;
    474   }
    475 
    476   // TODO: add other pcre flags
    477 
    478  private:
    479   int newline_mode_;
    480   int match_limit_;
    481   int match_limit_recursion_;
    482   int all_options_;
    483 };
    484 
    485 // These functions return some common RE_Options
    486 static inline RE_Options UTF() {
    487   return RE_Options().set_utf(true);
    488 }
    489 
    490 static inline RE_Options CASELESS() {
    491   return RE_Options().set_caseless(true);
    492 }
    493 static inline RE_Options MULTILINE() {
    494   return RE_Options().set_multiline(true);
    495 }
    496 
    497 static inline RE_Options DOTALL() {
    498   return RE_Options().set_dotall(true);
    499 }
    500 
    501 static inline RE_Options EXTENDED() {
    502   return RE_Options().set_extended(true);
    503 }
    504 
    505 // Interface for regular expression matching.  Also corresponds to a
    506 // pre-compiled regular expression.  An "RE" object is safe for
    507 // concurrent use by multiple threads.
    508 class RE {
    509  public:
    510   // We provide implicit conversions from strings so that users can
    511   // pass in a string or a "const char*" wherever an "RE" is expected.
    512   RE(const string& pat) { Init(pat, NULL); }
    513   RE(const string& pat, const RE_Options& option) { Init(pat, &option); }
    514   RE(const char* pat) { Init(pat, NULL); }
    515   RE(const char* pat, const RE_Options& option) { Init(pat, &option); }
    516   RE(const unsigned char* pat) {
    517     Init(reinterpret_cast<const char*>(pat), NULL);
    518   }
    519   RE(const unsigned char* pat, const RE_Options& option) {
    520     Init(reinterpret_cast<const char*>(pat), &option);
    521   }
    522 
    523   // Copy constructor & assignment - note that these are expensive
    524   // because they recompile the expression.
    525   RE(const RE& re) { Init(re.pattern_, &re.options_); }
    526   const RE& operator=(const RE& re) {
    527     if (this != &re) {
    528       Cleanup();
    529 
    530       // This is the code that originally came from Google
    531       // Init(re.pattern_.c_str(), &re.options_);
    532 
    533       // This is the replacement from Ari Pollak
    534       Init(re.pattern_, &re.options_);
    535     }
    536     return *this;
    537   }
    538 
    539 
    540   ~RE();
    541 
    542   // The string specification for this RE.  E.g.
    543   //   RE re("ab*c?d+");
    544   //   re.pattern();    // "ab*c?d+"
    545   const string& pattern() const { return pattern_; }
    546 
    547   // If RE could not be created properly, returns an error string.
    548   // Else returns the empty string.
    549   const string& error() const { return error_; }
    550 
    551   /***** The useful part: the matching interface *****/
    552 
    553   // This is provided so one can do pattern.ReplaceAll() just as
    554   // easily as ReplaceAll(pattern-text, ....)
    555 
    556   template<typename ... ARGS>
    557   bool FullMatch(const StringPiece & text, ARGS && ...a) const {
    558     // create an array with the size of the number of arguments given
    559     Arg args[Args<ARGS...>::count()];
    560     // initialize the array with the arguments given
    561     Args<ARGS...>::arrayify(args, a...);
    562 
    563     return DoMatchImpl(text, ANCHOR_BOTH, NULL, args, Args<ARGS...>::count());
    564   }
    565 
    566   template<typename ... ARGS>
    567   bool PartialMatch(const StringPiece& text, ARGS && ...a) const {
    568     // create an array with the size of the number of arguments given
    569     Arg args[Args<ARGS...>::count()];
    570     // initialize the array with the arguments given
    571     Args<ARGS...>::arrayify(args, a...);
    572 
    573     return DoMatchImpl(text, UNANCHORED, NULL, args, Args<ARGS...>::count());
    574   }
    575 
    576   template<typename ... ARGS>
    577   bool Consume(StringPiece* input, ARGS && ...a) const {
    578     // create an array with the size of the number of arguments given
    579     Arg args[Args<ARGS...>::count()];
    580     // initialize the array with the arguments given
    581     Args<ARGS...>::arrayify(args, a...);
    582 
    583     int consumed;
    584     if (DoMatchImpl(*input, ANCHOR_START, &consumed, args,
    585                     Args<ARGS...>::count())) {
    586       input->remove_prefix(consumed);
    587       return true;
    588     } else {
    589       return false;
    590     }
    591   }
    592 
    593   template<typename ... ARGS>
    594   bool FindAndConsume(StringPiece* input, ARGS && ...a) const {
    595     Arg args[Args<ARGS...>::count()];
    596     Args<ARGS...>::arrayify(args, a...);
    597     int consumed;
    598     if (DoMatchImpl(*input, UNANCHORED, &consumed, args,
    599                     Args<ARGS...>::count())) {
    600       input->remove_prefix(consumed);
    601       return true;
    602     } else {
    603       return false;
    604     }
    605   }
    606 
    607   bool Replace(const StringPiece& rewrite,
    608                string *str) const;
    609 
    610   int GlobalReplace(const StringPiece& rewrite,
    611                     string *str) const;
    612 
    613   bool Extract(const StringPiece &rewrite,
    614                const StringPiece &text,
    615                string *out) const;
    616 
    617   // Escapes all potentially meaningful regexp characters in
    618   // 'unquoted'.  The returned string, used as a regular expression,
    619   // will exactly match the original string.  For example,
    620   //           1.5-2.0?
    621   // may become:
    622   //           1\.5\-2\.0\?
    623   // Note QuoteMeta behaves the same as perl's QuoteMeta function,
    624   // *except* that it escapes the NUL character (\0) as backslash + 0,
    625   // rather than backslash + NUL.
    626   static string QuoteMeta(const StringPiece& unquoted);
    627 
    628 
    629   /***** Generic matching interface *****/
    630 
    631   // Type of match (TODO: Should be restructured as part of RE_Options)
    632   enum Anchor {
    633     UNANCHORED,         // No anchoring
    634     ANCHOR_START,       // Anchor at start only
    635     ANCHOR_BOTH         // Anchor at start and end
    636   };
    637 
    638   // General matching routine.  Stores the length of the match in
    639   // "*consumed" if successful.
    640   bool DoMatch(const StringPiece& text,
    641                Anchor anchor,
    642                int* consumed,
    643                Arg const argsp[], int n) const;
    644 
    645   // Return the number of capturing subpatterns, or -1 if the
    646   // regexp wasn't valid on construction.
    647   int NumberOfCapturingGroups() const;
    648 
    649  private:
    650 
    651   void Init(const string& pattern, const RE_Options* options);
    652   void Cleanup();
    653 
    654   // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
    655   // pairs of integers for the beginning and end positions of matched
    656   // text.  The first pair corresponds to the entire matched text;
    657   // subsequent pairs correspond, in order, to parentheses-captured
    658   // matches.  Returns the number of pairs (one more than the number of
    659   // the last subpattern with a match) if matching was successful
    660   // and zero if the match failed.
    661   // I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
    662   // against "foo", "bar", and "baz" respectively.
    663   // When matching RE("(foo)|hello") against "hello", it will return 1.
    664   // But the values for all subpattern are filled in into "vec".
    665   int TryMatch(const StringPiece& text,
    666                int startpos,
    667                Anchor anchor,
    668                bool empty_ok,
    669                pcre2_match_data_ptr & match_data) const;
    670 
    671   // Append the "rewrite" string, with backslash subsitutions from "text"
    672   // and "vec", to string "out".
    673   bool Rewrite(string *out,
    674                const StringPiece& rewrite,
    675                const StringPiece& text,
    676                pcre2_match_data_ptr const & match_data) const;
    677 
    678   // internal implementation for DoMatch
    679   bool DoMatchImpl(const StringPiece& text,
    680                    Anchor anchor,
    681                    int* consumed,
    682                    const Arg args[],
    683                    int n) const;
    684 
    685   // Compile the regexp for the specified anchoring mode
    686   pcre2_code * Compile(Anchor anchor);
    687 
    688   string        pattern_;
    689   RE_Options    options_;
    690   pcre2_code*   re_full_;       // For full matches
    691   pcre2_code*   re_partial_;    // For partial matches
    692   string        error_;         // Error indicator
    693 };
    694 
    695 }   // namespace pcrecpp
    696 
    697 #endif /* _PCRECPP_H */
    698