Home | History | Annotate | Download | only in pcrecpp
      1 // -*- coding: utf-8 -*-
      2 //
      3 // Copyright (c) 2005 - 2010, Google Inc.
      4 // All rights reserved.
      5 //
      6 // Redistribution and use in source and binary forms, with or without
      7 // modification, are permitted provided that the following conditions are
      8 // met:
      9 //
     10 //     * Redistributions of source code must retain the above copyright
     11 // notice, this list of conditions and the following disclaimer.
     12 //     * Redistributions in binary form must reproduce the above
     13 // copyright notice, this list of conditions and the following disclaimer
     14 // in the documentation and/or other materials provided with the
     15 // distribution.
     16 //     * Neither the name of Google Inc. nor the names of its
     17 // contributors may be used to endorse or promote products derived from
     18 // this software without specific prior written permission.
     19 //
     20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31 //
     32 // Author: Sanjay Ghemawat
     33 //
     34 // TODO: Test extractions for PartialMatch/Consume
     35 
     36 #ifdef HAVE_CONFIG_H
     37 #include "config.h"
     38 #endif
     39 
     40 #include <stdio.h>
     41 #include <string.h>      /* for memset and strcmp */
     42 #include <cassert>
     43 #include <vector>
     44 #include "pcrecpp.h"
     45 
     46 using pcrecpp::StringPiece;
     47 using pcrecpp::RE;
     48 using pcrecpp::RE_Options;
     49 using pcrecpp::Hex;
     50 using pcrecpp::Octal;
     51 using pcrecpp::CRadix;
     52 
     53 static bool VERBOSE_TEST  = false;
     54 
     55 // CHECK dies with a fatal error if condition is not true.  It is *not*
     56 // controlled by NDEBUG, so the check will be executed regardless of
     57 // compilation mode.  Therefore, it is safe to do things like:
     58 //    CHECK_EQ(fp->Write(x), 4)
     59 #define CHECK(condition) do {                           \
     60   if (!(condition)) {                                   \
     61     fprintf(stderr, "%s:%d: Check failed: %s\n",        \
     62             __FILE__, __LINE__, #condition);            \
     63     exit(1);                                            \
     64   }                                                     \
     65 } while (0)
     66 
     67 #define CHECK_EQ(a, b)   CHECK(a == b)
     68 
     69 static void Timing1(int num_iters) {
     70   // Same pattern lots of times
     71   RE pattern("ruby:\\d+");
     72   StringPiece p("ruby:1234");
     73   for (int j = num_iters; j > 0; j--) {
     74     CHECK(pattern.FullMatch(p));
     75   }
     76 }
     77 
     78 static void Timing2(int num_iters) {
     79   // Same pattern lots of times
     80   RE pattern("ruby:(\\d+)");
     81   int i;
     82   for (int j = num_iters; j > 0; j--) {
     83     CHECK(pattern.FullMatch("ruby:1234", &i));
     84     CHECK_EQ(i, 1234);
     85   }
     86 }
     87 
     88 static void Timing3(int num_iters) {
     89   string text_string;
     90   for (int j = num_iters; j > 0; j--) {
     91     text_string += "this is another line\n";
     92   }
     93 
     94   RE line_matcher(".*\n");
     95   string line;
     96   StringPiece text(text_string);
     97   int counter = 0;
     98   while (line_matcher.Consume(&text)) {
     99     counter++;
    100   }
    101   printf("Matched %d lines\n", counter);
    102 }
    103 
    104 #if 0  // uncomment this if you have a way of defining VirtualProcessSize()
    105 
    106 static void LeakTest() {
    107   // Check for memory leaks
    108   unsigned long long initial_size = 0;
    109   for (int i = 0; i < 100000; i++) {
    110     if (i == 50000) {
    111       initial_size = VirtualProcessSize();
    112       printf("Size after 50000: %llu\n", initial_size);
    113     }
    114     char buf[100];  // definitely big enough
    115     sprintf(buf, "pat%09d", i);
    116     RE newre(buf);
    117   }
    118   uint64 final_size = VirtualProcessSize();
    119   printf("Size after 100000: %llu\n", final_size);
    120   const double growth = double(final_size - initial_size) / final_size;
    121   printf("Growth: %0.2f%%", growth * 100);
    122   CHECK(growth < 0.02);       // Allow < 2% growth
    123 }
    124 
    125 #endif
    126 
    127 static void RadixTests() {
    128   printf("Testing hex\n");
    129 
    130 #define CHECK_HEX(type, value) \
    131   do { \
    132     type v; \
    133     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
    134     CHECK_EQ(v, 0x ## value); \
    135     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
    136     CHECK_EQ(v, 0x ## value); \
    137   } while(0)
    138 
    139   CHECK_HEX(short,              2bad);
    140   CHECK_HEX(unsigned short,     2badU);
    141   CHECK_HEX(int,                dead);
    142   CHECK_HEX(unsigned int,       deadU);
    143   CHECK_HEX(long,               7eadbeefL);
    144   CHECK_HEX(unsigned long,      deadbeefUL);
    145 #ifdef HAVE_LONG_LONG
    146   CHECK_HEX(long long,          12345678deadbeefLL);
    147 #endif
    148 #ifdef HAVE_UNSIGNED_LONG_LONG
    149   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
    150 #endif
    151 
    152 #undef CHECK_HEX
    153 
    154   printf("Testing octal\n");
    155 
    156 #define CHECK_OCTAL(type, value) \
    157   do { \
    158     type v; \
    159     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
    160     CHECK_EQ(v, 0 ## value); \
    161     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
    162     CHECK_EQ(v, 0 ## value); \
    163   } while(0)
    164 
    165   CHECK_OCTAL(short,              77777);
    166   CHECK_OCTAL(unsigned short,     177777U);
    167   CHECK_OCTAL(int,                17777777777);
    168   CHECK_OCTAL(unsigned int,       37777777777U);
    169   CHECK_OCTAL(long,               17777777777L);
    170   CHECK_OCTAL(unsigned long,      37777777777UL);
    171 #ifdef HAVE_LONG_LONG
    172   CHECK_OCTAL(long long,          777777777777777777777LL);
    173 #endif
    174 #ifdef HAVE_UNSIGNED_LONG_LONG
    175   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
    176 #endif
    177 
    178 #undef CHECK_OCTAL
    179 
    180   printf("Testing decimal\n");
    181 
    182 #define CHECK_DECIMAL(type, value) \
    183   do { \
    184     type v; \
    185     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
    186     CHECK_EQ(v, value); \
    187     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
    188     CHECK_EQ(v, value); \
    189   } while(0)
    190 
    191   CHECK_DECIMAL(short,              -1);
    192   CHECK_DECIMAL(unsigned short,     9999);
    193   CHECK_DECIMAL(int,                -1000);
    194   CHECK_DECIMAL(unsigned int,       12345U);
    195   CHECK_DECIMAL(long,               -10000000L);
    196   CHECK_DECIMAL(unsigned long,      3083324652U);
    197 #ifdef HAVE_LONG_LONG
    198   CHECK_DECIMAL(long long,          -100000000000000LL);
    199 #endif
    200 #ifdef HAVE_UNSIGNED_LONG_LONG
    201   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
    202 #endif
    203 
    204 #undef CHECK_DECIMAL
    205 
    206 }
    207 
    208 static void TestReplace() {
    209   printf("Testing Replace\n");
    210 
    211   struct ReplaceTest {
    212     const char *regexp;
    213     const char *rewrite;
    214     const char *original;
    215     const char *single;
    216     const char *global;
    217     int global_count;         // the expected return value from ReplaceAll
    218   };
    219   static const ReplaceTest tests[] = {
    220     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
    221       "\\2\\1ay",
    222       "the quick brown fox jumps over the lazy dogs.",
    223       "ethay quick brown fox jumps over the lazy dogs.",
    224       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
    225       9 },
    226     { "\\w+",
    227       "\\0-NOSPAM",
    228       "paul.haahr (at) google.com",
    229       "paul-NOSPAM.haahr (at) google.com",
    230       "paul-NOSPAM.haahr-NOSPAM (at) google-NOSPAM.com-NOSPAM",
    231       4 },
    232     { "^",
    233       "(START)",
    234       "foo",
    235       "(START)foo",
    236       "(START)foo",
    237       1 },
    238     { "^",
    239       "(START)",
    240       "",
    241       "(START)",
    242       "(START)",
    243       1 },
    244     { "$",
    245       "(END)",
    246       "",
    247       "(END)",
    248       "(END)",
    249       1 },
    250     { "b",
    251       "bb",
    252       "ababababab",
    253       "abbabababab",
    254       "abbabbabbabbabb",
    255        5 },
    256     { "b",
    257       "bb",
    258       "bbbbbb",
    259       "bbbbbbb",
    260       "bbbbbbbbbbbb",
    261       6 },
    262     { "b+",
    263       "bb",
    264       "bbbbbb",
    265       "bb",
    266       "bb",
    267       1 },
    268     { "b*",
    269       "bb",
    270       "bbbbbb",
    271       "bb",
    272       "bbbb",
    273       2 },
    274     { "b*",
    275       "bb",
    276       "aaaaa",
    277       "bbaaaaa",
    278       "bbabbabbabbabbabb",
    279       6 },
    280     { "b*",
    281       "bb",
    282       "aa\naa\n",
    283       "bbaa\naa\n",
    284       "bbabbabb\nbbabbabb\nbb",
    285       7 },
    286     { "b*",
    287       "bb",
    288       "aa\raa\r",
    289       "bbaa\raa\r",
    290       "bbabbabb\rbbabbabb\rbb",
    291       7 },
    292     { "b*",
    293       "bb",
    294       "aa\r\naa\r\n",
    295       "bbaa\r\naa\r\n",
    296       "bbabbabb\r\nbbabbabb\r\nbb",
    297       7 },
    298     // Check empty-string matching (it's tricky!)
    299     { "aa|b*",
    300       "@",
    301       "aa",
    302       "@",
    303       "@@",
    304       2 },
    305     { "b*|aa",
    306       "@",
    307       "aa",
    308       "@aa",
    309       "@@@",
    310       3 },
    311 #ifdef SUPPORT_UTF8
    312     { "b*",
    313       "bb",
    314       "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
    315       "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
    316       "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
    317       5 },
    318     { "b*",
    319       "bb",
    320       "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
    321       "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
    322       ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
    323        "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
    324       9 },
    325 #endif
    326     { "", NULL, NULL, NULL, NULL, 0 }
    327   };
    328 
    329 #ifdef SUPPORT_UTF8
    330   const bool support_utf8 = true;
    331 #else
    332   const bool support_utf8 = false;
    333 #endif
    334 
    335   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
    336     RE re(t->regexp, RE_Options().set_newline_mode(PCRE2_NEWLINE_CRLF)
    337                                  .set_utf(support_utf8));
    338     assert(re.error().empty());
    339     string one(t->original);
    340     CHECK(re.Replace(t->rewrite, &one));
    341     CHECK_EQ(one, t->single);
    342     string all(t->original);
    343     const int replace_count = re.GlobalReplace(t->rewrite, &all);
    344     CHECK_EQ(all, t->global);
    345     CHECK_EQ(replace_count, t->global_count);
    346   }
    347 
    348   // One final test: test \r\n replacement when we're not in CRLF mode
    349   {
    350     RE re("b*", RE_Options().set_newline_mode(PCRE2_NEWLINE_CR)
    351                             .set_utf(support_utf8));
    352     assert(re.error().empty());
    353     string all("aa\r\naa\r\n");
    354     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
    355     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
    356   }
    357   {
    358     RE re("b*", RE_Options().set_newline_mode(PCRE2_NEWLINE_LF)
    359                             .set_utf(support_utf8));
    360     assert(re.error().empty());
    361     string all("aa\r\naa\r\n");
    362     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
    363     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
    364   }
    365   // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
    366   //       Alas, the answer depends on how pcre was compiled.
    367 }
    368 
    369 static void TestExtract() {
    370   printf("Testing Extract\n");
    371 
    372   string s;
    373 
    374   CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris (at) kremvax.ru", &s));
    375   CHECK_EQ(s, "kremvax!boris");
    376 
    377   // check the RE interface as well
    378   CHECK(RE(".*").Extract("'\\0'", "foo", &s));
    379   CHECK_EQ(s, "'foo'");
    380   CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
    381   CHECK_EQ(s, "'foo'");
    382 }
    383 
    384 static void TestConsume() {
    385   printf("Testing Consume\n");
    386 
    387   string word;
    388 
    389   string s("   aaa b!@#$@#$cccc");
    390   StringPiece input(s);
    391 
    392   RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
    393   CHECK(r.Consume(&input, &word));
    394   CHECK_EQ(word, "aaa");
    395   CHECK(r.Consume(&input, &word));
    396   CHECK_EQ(word, "b");
    397   CHECK(! r.Consume(&input, &word));
    398 }
    399 
    400 static void TestFindAndConsume() {
    401   printf("Testing FindAndConsume\n");
    402 
    403   string word;
    404 
    405   string s("   aaa b!@#$@#$cccc");
    406   StringPiece input(s);
    407 
    408   RE r("(\\w+)");      // matches a word
    409   CHECK(r.FindAndConsume(&input, &word));
    410   CHECK_EQ(word, "aaa");
    411   CHECK(r.FindAndConsume(&input, &word));
    412   CHECK_EQ(word, "b");
    413   CHECK(r.FindAndConsume(&input, &word));
    414   CHECK_EQ(word, "cccc");
    415   CHECK(! r.FindAndConsume(&input, &word));
    416 }
    417 
    418 static void TestMatchNumberPeculiarity() {
    419   printf("Testing match-number peculiarity\n");
    420 
    421   string word1;
    422   string word2;
    423   string word3;
    424 
    425   RE r("(foo)|(bar)|(baz)");
    426   CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
    427   CHECK_EQ(word1, "foo");
    428   CHECK_EQ(word2, "");
    429   CHECK_EQ(word3, "");
    430   CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
    431   CHECK_EQ(word1, "");
    432   CHECK_EQ(word2, "bar");
    433   CHECK_EQ(word3, "");
    434   CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
    435   CHECK_EQ(word1, "");
    436   CHECK_EQ(word2, "");
    437   CHECK_EQ(word3, "baz");
    438   CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
    439 
    440   string a;
    441   CHECK(RE("(foo)|hello").FullMatch("hello", &a));
    442   CHECK_EQ(a, "");
    443 }
    444 
    445 static void TestRecursion() {
    446   printf("Testing recursion\n");
    447 
    448   // Get one string that passes (sometimes), one that never does.
    449   string text_good("abcdefghijk");
    450   string text_bad("acdefghijkl");
    451 
    452   // According to pcretest, matching text_good against (\w+)*b
    453   // requires match_limit of at least 8192, and match_recursion_limit
    454   // of at least 37.
    455 
    456   RE_Options options_ml;
    457   options_ml.set_match_limit(8192);
    458   RE re("(\\w+)*b", options_ml);
    459   CHECK(re.PartialMatch(text_good) == true);
    460   CHECK(re.PartialMatch(text_bad) == false);
    461   CHECK(re.FullMatch(text_good) == false);
    462   CHECK(re.FullMatch(text_bad) == false);
    463 
    464   options_ml.set_match_limit(1024);
    465   RE re2("(\\w+)*b", options_ml);
    466   CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
    467   CHECK(re2.PartialMatch(text_bad) == false);
    468   CHECK(re2.FullMatch(text_good) == false);
    469   CHECK(re2.FullMatch(text_bad) == false);
    470 
    471   RE_Options options_mlr;
    472   options_mlr.set_match_limit_recursion(50);
    473   RE re3("(\\w+)*b", options_mlr);
    474   CHECK(re3.PartialMatch(text_good) == true);
    475   CHECK(re3.PartialMatch(text_bad) == false);
    476   CHECK(re3.FullMatch(text_good) == false);
    477   CHECK(re3.FullMatch(text_bad) == false);
    478 
    479   options_mlr.set_match_limit_recursion(10);
    480   RE re4("(\\w+)*b", options_mlr);
    481   CHECK(re4.PartialMatch(text_good) == false);
    482   CHECK(re4.PartialMatch(text_bad) == false);
    483   CHECK(re4.FullMatch(text_good) == false);
    484   CHECK(re4.FullMatch(text_bad) == false);
    485 }
    486 
    487 // A meta-quoted string, interpreted as a pattern, should always match
    488 // the original unquoted string.
    489 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
    490   string quoted = RE::QuoteMeta(unquoted);
    491   RE re(quoted, options);
    492   CHECK(re.FullMatch(unquoted));
    493 }
    494 
    495 // A string containing meaningful regexp characters, which is then meta-
    496 // quoted, should not generally match a string the unquoted string does.
    497 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
    498                                   RE_Options options = RE_Options()) {
    499   string quoted = RE::QuoteMeta(unquoted);
    500   RE re(quoted, options);
    501   CHECK(!re.FullMatch(should_not_match));
    502 }
    503 
    504 // Tests that quoted meta characters match their original strings,
    505 // and that a few things that shouldn't match indeed do not.
    506 static void TestQuotaMetaSimple() {
    507   TestQuoteMeta("foo");
    508   TestQuoteMeta("foo.bar");
    509   TestQuoteMeta("foo\\.bar");
    510   TestQuoteMeta("[1-9]");
    511   TestQuoteMeta("1.5-2.0?");
    512   TestQuoteMeta("\\d");
    513   TestQuoteMeta("Who doesn't like ice cream?");
    514   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
    515   TestQuoteMeta("((?!)xxx).*yyy");
    516   TestQuoteMeta("([");
    517   TestQuoteMeta(string("foo\0bar", 7));
    518 }
    519 
    520 static void TestQuoteMetaSimpleNegative() {
    521   NegativeTestQuoteMeta("foo", "bar");
    522   NegativeTestQuoteMeta("...", "bar");
    523   NegativeTestQuoteMeta("\\.", ".");
    524   NegativeTestQuoteMeta("\\.", "..");
    525   NegativeTestQuoteMeta("(a)", "a");
    526   NegativeTestQuoteMeta("(a|b)", "a");
    527   NegativeTestQuoteMeta("(a|b)", "(a)");
    528   NegativeTestQuoteMeta("(a|b)", "a|b");
    529   NegativeTestQuoteMeta("[0-9]", "0");
    530   NegativeTestQuoteMeta("[0-9]", "0-9");
    531   NegativeTestQuoteMeta("[0-9]", "[9]");
    532   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
    533 }
    534 
    535 static void TestQuoteMetaLatin1() {
    536   TestQuoteMeta("3\xb2 = 9");
    537 }
    538 
    539 static void TestQuoteMetaUtf8() {
    540 #ifdef SUPPORT_UTF8
    541   TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
    542   TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
    543   TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
    544   TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
    545   TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
    546   TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
    547   TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
    548   NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
    549                         "27\\\xc2\\\xb0",
    550                         pcrecpp::UTF8());
    551 #endif
    552 }
    553 
    554 static void TestQuoteMetaAll() {
    555   printf("Testing QuoteMeta\n");
    556   TestQuotaMetaSimple();
    557   TestQuoteMetaSimpleNegative();
    558   TestQuoteMetaLatin1();
    559   TestQuoteMetaUtf8();
    560 }
    561 
    562 //
    563 // Options tests contributed by
    564 // Giuseppe Maxia, CTO, Stardata s.r.l.
    565 // July 2005
    566 //
    567 static void GetOneOptionResult(
    568                 const char *option_name,
    569                 const char *regex,
    570                 const char *str,
    571                 RE_Options options,
    572                 bool full,
    573                 string expected) {
    574 
    575   printf("Testing Option <%s>\n", option_name);
    576   if(VERBOSE_TEST)
    577     printf("/%s/ finds \"%s\" within \"%s\" \n",
    578                     regex,
    579                     expected.c_str(),
    580                     str);
    581   string captured("");
    582   if (full)
    583     RE(regex,options).FullMatch(str, &captured);
    584   else
    585     RE(regex,options).PartialMatch(str, &captured);
    586   CHECK_EQ(captured, expected);
    587 }
    588 
    589 static void TestOneOption(
    590                 const char *option_name,
    591                 const char *regex,
    592                 const char *str,
    593                 RE_Options options,
    594                 bool full,
    595                 bool assertive = true) {
    596 
    597   printf("Testing Option <%s>\n", option_name);
    598   if (VERBOSE_TEST)
    599     printf("'%s' %s /%s/ \n",
    600                   str,
    601                   (assertive? "matches" : "doesn't match"),
    602                   regex);
    603   if (assertive) {
    604     if (full)
    605       CHECK(RE(regex,options).FullMatch(str));
    606     else
    607       CHECK(RE(regex,options).PartialMatch(str));
    608   } else {
    609     if (full)
    610       CHECK(!RE(regex,options).FullMatch(str));
    611     else
    612       CHECK(!RE(regex,options).PartialMatch(str));
    613   }
    614 }
    615 
    616 static void Test_CASELESS() {
    617   RE_Options options;
    618   RE_Options options2;
    619 
    620   options.set_caseless(true);
    621   TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
    622   TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
    623   TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
    624 
    625   TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
    626   TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
    627   options.set_caseless(false);
    628   TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
    629 }
    630 
    631 static void Test_MULTILINE() {
    632   RE_Options options;
    633   RE_Options options2;
    634   const char *str = "HELLO\n" "cruel\n" "world\n";
    635 
    636   options.set_multiline(true);
    637   TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
    638   TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
    639   TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
    640   options.set_multiline(false);
    641   TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
    642 }
    643 
    644 static void Test_DOTALL() {
    645   RE_Options options;
    646   RE_Options options2;
    647   const char *str = "HELLO\n" "cruel\n" "world";
    648 
    649   options.set_dotall(true);
    650   TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
    651   TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
    652   TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
    653   options.set_dotall(false);
    654   TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
    655 }
    656 
    657 static void Test_DOLLAR_ENDONLY() {
    658   RE_Options options;
    659   RE_Options options2;
    660   const char *str = "HELLO world\n";
    661 
    662   TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
    663   options.set_dollar_endonly(true);
    664   TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
    665   TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
    666 }
    667 
    668 static void Test_EXTENDED() {
    669   RE_Options options;
    670   RE_Options options2;
    671   const char *str = "HELLO world";
    672 
    673   options.set_extended(true);
    674   TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
    675   TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
    676   TestOneOption("EXTENDED (class)",
    677                     "^ HE L{2} O "
    678                     "\\s+        "
    679                     "\\w+ $      ",
    680                     str,
    681                     options,
    682                     false);
    683 
    684   TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
    685   TestOneOption("EXTENDED (function)",
    686                     "^ HE L{2} O "
    687                     "\\s+        "
    688                     "\\w+ $      ",
    689                     str,
    690                     pcrecpp::EXTENDED(),
    691                     false);
    692 
    693   options.set_extended(false);
    694   TestOneOption("no EXTENDED", "HELLO world", str, options, false);
    695 }
    696 
    697 static void Test_NO_AUTO_CAPTURE() {
    698   RE_Options options;
    699   const char *str = "HELLO world";
    700   string captured;
    701 
    702   printf("Testing Option <no NO_AUTO_CAPTURE>\n");
    703   if (VERBOSE_TEST)
    704     printf("parentheses capture text\n");
    705   RE re("(world|universe)$", options);
    706   CHECK(re.Extract("\\1", str , &captured));
    707   CHECK_EQ(captured, "world");
    708   options.set_no_auto_capture(true);
    709   printf("testing Option <NO_AUTO_CAPTURE>\n");
    710   if (VERBOSE_TEST)
    711     printf("parentheses do not capture text\n");
    712   re.Extract("\\1",str, &captured );
    713   CHECK_EQ(captured, "world");
    714 }
    715 
    716 static void Test_UNGREEDY() {
    717   RE_Options options;
    718   const char *str = "HELLO, 'this' is the 'world'";
    719 
    720   options.set_ungreedy(true);
    721   GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
    722   GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
    723   GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
    724 
    725   options.set_ungreedy(false);
    726   GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
    727   GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
    728 }
    729 
    730 static void Test_all_options() {
    731   const char *str = "HELLO\n" "cruel\n" "world";
    732   RE_Options options;
    733   options.set_all_options(PCRE2_CASELESS | PCRE2_DOTALL);
    734 
    735   TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
    736   options.set_all_options(0);
    737   TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
    738   options.set_all_options(PCRE2_MULTILINE | PCRE2_EXTENDED);
    739 
    740   TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
    741   TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
    742                   " ^ c r u e l $ ",
    743                   str,
    744                   RE_Options(PCRE2_MULTILINE | PCRE2_EXTENDED),
    745                   false);
    746 
    747   TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
    748                   " ^ c r u e l $ ",
    749                   str,
    750                   RE_Options()
    751                        .set_multiline(true)
    752                        .set_extended(true),
    753                   false);
    754 
    755   options.set_all_options(0);
    756   TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
    757 
    758 }
    759 
    760 static void TestOptions() {
    761   printf("Testing Options\n");
    762   Test_CASELESS();
    763   Test_MULTILINE();
    764   Test_DOTALL();
    765   Test_DOLLAR_ENDONLY();
    766   Test_EXTENDED();
    767   Test_NO_AUTO_CAPTURE();
    768   Test_UNGREEDY();
    769   Test_all_options();
    770 }
    771 
    772 static void TestConstructors() {
    773   printf("Testing constructors\n");
    774 
    775   RE_Options options;
    776   options.set_dotall(true);
    777   const char *str = "HELLO\n" "cruel\n" "world";
    778 
    779   RE orig("HELLO.*world", options);
    780   CHECK(orig.FullMatch(str));
    781 
    782   RE copy1(orig);
    783   CHECK(copy1.FullMatch(str));
    784 
    785   RE copy2("not a match");
    786   CHECK(!copy2.FullMatch(str));
    787   copy2 = copy1;
    788   CHECK(copy2.FullMatch(str));
    789   copy2 = orig;
    790   CHECK(copy2.FullMatch(str));
    791 
    792   // Make sure when we assign to ourselves, nothing bad happens
    793   orig = orig;
    794   copy1 = copy1;
    795   copy2 = copy2;
    796   CHECK(orig.FullMatch(str));
    797   CHECK(copy1.FullMatch(str));
    798   CHECK(copy2.FullMatch(str));
    799 }
    800 
    801 int main(int argc, char** argv) {
    802   // Treat any flag as --help
    803   if (argc > 1 && argv[1][0] == '-') {
    804     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
    805            "       If 'timingX ###' is specified, run the given timing test\n"
    806            "       with the given number of iterations, rather than running\n"
    807            "       the default corectness test.\n", argv[0]);
    808     return 0;
    809   }
    810 
    811   if (argc > 1) {
    812     if ( argc == 2 || atoi(argv[2]) == 0) {
    813       printf("timing mode needs a num-iters argument\n");
    814       return 1;
    815     }
    816     if (!strcmp(argv[1], "timing1"))
    817       Timing1(atoi(argv[2]));
    818     else if (!strcmp(argv[1], "timing2"))
    819       Timing2(atoi(argv[2]));
    820     else if (!strcmp(argv[1], "timing3"))
    821       Timing3(atoi(argv[2]));
    822     else
    823       printf("Unknown argument '%s'\n", argv[1]);
    824     return 0;
    825   }
    826 
    827   printf("PCRE C++ wrapper tests\n");
    828   printf("Testing FullMatch\n");
    829 
    830   int i;
    831   string s;
    832 
    833   /***** FullMatch with no args *****/
    834 
    835   CHECK(RE("h.*o").FullMatch("hello"));
    836   CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
    837   CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
    838   CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
    839   CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
    840   CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
    841 
    842   /***** FullMatch with args *****/
    843 
    844   // Zero-arg
    845   CHECK(RE("\\d+").FullMatch("1001"));
    846 
    847   // Single-arg
    848   CHECK(RE("(\\d+)").FullMatch("1001",   &i));
    849   CHECK_EQ(i, 1001);
    850   CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
    851   CHECK_EQ(i, -123);
    852   CHECK(!RE("()\\d+").FullMatch("10", &i));
    853   CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
    854                                 &i));
    855 
    856   // Digits surrounding integer-arg
    857   CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
    858   CHECK_EQ(i, 23);
    859   CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
    860   CHECK_EQ(i, 1);
    861   CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
    862   CHECK_EQ(i, -1);
    863   CHECK(RE("(\\d)").PartialMatch("1234", &i));
    864   CHECK_EQ(i, 1);
    865   CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
    866   CHECK_EQ(i, -1);
    867 
    868   // String-arg
    869   CHECK(RE("h(.*)o").FullMatch("hello", &s));
    870   CHECK_EQ(s, string("ell"));
    871 
    872   // StringPiece-arg
    873   StringPiece sp;
    874   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
    875   CHECK_EQ(sp.size(), 4);
    876   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
    877   CHECK_EQ(i, 1234);
    878 
    879   // Multi-arg
    880   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
    881   CHECK_EQ(s, string("ruby"));
    882   CHECK_EQ(i, 1234);
    883 
    884   // Ignore non-void* NULL arg
    885   CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
    886   CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
    887   CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
    888   CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
    889 #ifdef HAVE_LONG_LONG
    890   CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
    891 #endif
    892   CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
    893   CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
    894 
    895   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
    896   CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
    897   CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
    898   CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
    899   CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
    900   CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
    901 
    902   // Ignored arg
    903   CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
    904   CHECK_EQ(s, string("ruby"));
    905   CHECK_EQ(i, 1234);
    906 
    907   // Type tests
    908   {
    909     char c;
    910     CHECK(RE("(H)ello").FullMatch("Hello", &c));
    911     CHECK_EQ(c, 'H');
    912   }
    913   {
    914     unsigned char c;
    915     CHECK(RE("(H)ello").FullMatch("Hello", &c));
    916     CHECK_EQ(c, static_cast<unsigned char>('H'));
    917   }
    918   {
    919     short v;
    920     CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
    921     CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
    922     CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
    923     CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
    924     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
    925     CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
    926   }
    927   {
    928     unsigned short v;
    929     CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
    930     CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
    931     CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
    932     CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
    933   }
    934   {
    935     int v;
    936     static const int max_value = 0x7fffffff;
    937     static const int min_value = -max_value - 1;
    938     CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
    939     CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
    940     CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
    941     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
    942     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
    943     CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
    944   }
    945   {
    946     unsigned int v;
    947     static const unsigned int max_value = 0xfffffffful;
    948     CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
    949     CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
    950     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
    951   }
    952 #ifdef HAVE_LONG_LONG
    953 # if defined(__MINGW__) || defined(__MINGW32__)
    954 #   define LLD "%I64d"
    955 #   define LLU "%I64u"
    956 # else
    957 #   define LLD "%lld"
    958 #   define LLU "%llu"
    959 # endif
    960   {
    961     long long v;
    962     static const long long max_value = 0x7fffffffffffffffLL;
    963     static const long long min_value = -max_value - 1;
    964     char buf[32];  // definitely big enough for a long long
    965 
    966     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
    967     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
    968 
    969     sprintf(buf, LLD, max_value);
    970     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
    971 
    972     sprintf(buf, LLD, min_value);
    973     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
    974 
    975     sprintf(buf, LLD, max_value);
    976     assert(buf[strlen(buf)-1] != '9');
    977     buf[strlen(buf)-1]++;
    978     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
    979 
    980     sprintf(buf, LLD, min_value);
    981     assert(buf[strlen(buf)-1] != '9');
    982     buf[strlen(buf)-1]++;
    983     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
    984   }
    985 #endif
    986 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
    987   {
    988     unsigned long long v;
    989     long long v2;
    990     static const unsigned long long max_value = 0xffffffffffffffffULL;
    991     char buf[32];  // definitely big enough for a unsigned long long
    992 
    993     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
    994     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
    995 
    996     sprintf(buf, LLU, max_value);
    997     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
    998 
    999     assert(buf[strlen(buf)-1] != '9');
   1000     buf[strlen(buf)-1]++;
   1001     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
   1002   }
   1003 #endif
   1004   {
   1005     float v;
   1006     CHECK(RE("(.*)").FullMatch("100", &v));
   1007     CHECK(RE("(.*)").FullMatch("-100.", &v));
   1008     CHECK(RE("(.*)").FullMatch("1e23", &v));
   1009   }
   1010   {
   1011     double v;
   1012     CHECK(RE("(.*)").FullMatch("100", &v));
   1013     CHECK(RE("(.*)").FullMatch("-100.", &v));
   1014     CHECK(RE("(.*)").FullMatch("1e23", &v));
   1015   }
   1016 
   1017   // Check that matching is fully anchored
   1018   CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
   1019   CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
   1020   CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
   1021   CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
   1022 
   1023   // Braces
   1024   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
   1025   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
   1026   CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
   1027 
   1028   // Complicated RE
   1029   CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
   1030   CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
   1031   CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
   1032   CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
   1033 
   1034   // Check full-match handling (needs '$' tacked on internally)
   1035   CHECK(RE("fo|foo").FullMatch("fo"));
   1036   CHECK(RE("fo|foo").FullMatch("foo"));
   1037   CHECK(RE("fo|foo$").FullMatch("fo"));
   1038   CHECK(RE("fo|foo$").FullMatch("foo"));
   1039   CHECK(RE("foo$").FullMatch("foo"));
   1040   CHECK(!RE("foo\\$").FullMatch("foo$bar"));
   1041   CHECK(!RE("fo|bar").FullMatch("fox"));
   1042 
   1043   // Uncomment the following if we change the handling of '$' to
   1044   // prevent it from matching a trailing newline
   1045   if (false) {
   1046     // Check that we don't get bitten by pcre's special handling of a
   1047     // '\n' at the end of the string matching '$'
   1048     CHECK(!RE("foo$").PartialMatch("foo\n"));
   1049   }
   1050 
   1051   // Number of args
   1052   int a[16];
   1053   CHECK(RE("").FullMatch(""));
   1054 
   1055   memset(a, 0, sizeof(0));
   1056   CHECK(RE("(\\d){1}").FullMatch("1",
   1057                                  &a[0]));
   1058   CHECK_EQ(a[0], 1);
   1059 
   1060   memset(a, 0, sizeof(0));
   1061   CHECK(RE("(\\d)(\\d)").FullMatch("12",
   1062                                    &a[0],  &a[1]));
   1063   CHECK_EQ(a[0], 1);
   1064   CHECK_EQ(a[1], 2);
   1065 
   1066   memset(a, 0, sizeof(0));
   1067   CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
   1068                                         &a[0],  &a[1],  &a[2]));
   1069   CHECK_EQ(a[0], 1);
   1070   CHECK_EQ(a[1], 2);
   1071   CHECK_EQ(a[2], 3);
   1072 
   1073   memset(a, 0, sizeof(0));
   1074   CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
   1075                                              &a[0],  &a[1],  &a[2],  &a[3]));
   1076   CHECK_EQ(a[0], 1);
   1077   CHECK_EQ(a[1], 2);
   1078   CHECK_EQ(a[2], 3);
   1079   CHECK_EQ(a[3], 4);
   1080 
   1081   memset(a, 0, sizeof(0));
   1082   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
   1083                                                   &a[0],  &a[1],  &a[2],
   1084                                                   &a[3],  &a[4]));
   1085   CHECK_EQ(a[0], 1);
   1086   CHECK_EQ(a[1], 2);
   1087   CHECK_EQ(a[2], 3);
   1088   CHECK_EQ(a[3], 4);
   1089   CHECK_EQ(a[4], 5);
   1090 
   1091   memset(a, 0, sizeof(0));
   1092   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
   1093                                                        &a[0],  &a[1],  &a[2],
   1094                                                        &a[3],  &a[4],  &a[5]));
   1095   CHECK_EQ(a[0], 1);
   1096   CHECK_EQ(a[1], 2);
   1097   CHECK_EQ(a[2], 3);
   1098   CHECK_EQ(a[3], 4);
   1099   CHECK_EQ(a[4], 5);
   1100   CHECK_EQ(a[5], 6);
   1101 
   1102   memset(a, 0, sizeof(0));
   1103   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
   1104                                                             &a[0],  &a[1],  &a[2],  &a[3],
   1105                                                             &a[4],  &a[5],  &a[6]));
   1106   CHECK_EQ(a[0], 1);
   1107   CHECK_EQ(a[1], 2);
   1108   CHECK_EQ(a[2], 3);
   1109   CHECK_EQ(a[3], 4);
   1110   CHECK_EQ(a[4], 5);
   1111   CHECK_EQ(a[5], 6);
   1112   CHECK_EQ(a[6], 7);
   1113 
   1114   memset(a, 0, sizeof(0));
   1115   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
   1116            "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
   1117                "1234567890123456",
   1118                &a[0],  &a[1],  &a[2],  &a[3],
   1119                &a[4],  &a[5],  &a[6],  &a[7],
   1120                &a[8],  &a[9],  &a[10], &a[11],
   1121                &a[12], &a[13], &a[14], &a[15]));
   1122   CHECK_EQ(a[0], 1);
   1123   CHECK_EQ(a[1], 2);
   1124   CHECK_EQ(a[2], 3);
   1125   CHECK_EQ(a[3], 4);
   1126   CHECK_EQ(a[4], 5);
   1127   CHECK_EQ(a[5], 6);
   1128   CHECK_EQ(a[6], 7);
   1129   CHECK_EQ(a[7], 8);
   1130   CHECK_EQ(a[8], 9);
   1131   CHECK_EQ(a[9], 0);
   1132   CHECK_EQ(a[10], 1);
   1133   CHECK_EQ(a[11], 2);
   1134   CHECK_EQ(a[12], 3);
   1135   CHECK_EQ(a[13], 4);
   1136   CHECK_EQ(a[14], 5);
   1137   CHECK_EQ(a[15], 6);
   1138 
   1139   /***** PartialMatch *****/
   1140 
   1141   printf("Testing PartialMatch\n");
   1142 
   1143   CHECK(RE("h.*o").PartialMatch("hello"));
   1144   CHECK(RE("h.*o").PartialMatch("othello"));
   1145   CHECK(RE("h.*o").PartialMatch("hello!"));
   1146   CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
   1147 
   1148   /***** other tests *****/
   1149 
   1150   RadixTests();
   1151   TestReplace();
   1152   TestExtract();
   1153   TestConsume();
   1154   TestFindAndConsume();
   1155   TestQuoteMetaAll();
   1156   TestMatchNumberPeculiarity();
   1157 
   1158   // Check the pattern() accessor
   1159   {
   1160     const string kPattern = "http://([^/]+)/.*";
   1161     const RE re(kPattern);
   1162     CHECK_EQ(kPattern, re.pattern());
   1163   }
   1164 
   1165   // Check RE error field.
   1166   {
   1167     RE re("foo");
   1168     CHECK(re.error().empty());  // Must have no error
   1169   }
   1170 
   1171 #ifdef SUPPORT_UTF8
   1172   // Check UTF-8 handling
   1173   {
   1174     printf("Testing UTF-8 handling\n");
   1175 
   1176     // Three Japanese characters (nihongo)
   1177     const unsigned char utf8_string[] = {
   1178          0xe6, 0x97, 0xa5, // 65e5
   1179          0xe6, 0x9c, 0xac, // 627c
   1180          0xe8, 0xaa, 0x9e, // 8a9e
   1181          0
   1182     };
   1183     const unsigned char utf8_pattern[] = {
   1184          '.',
   1185          0xe6, 0x9c, 0xac, // 627c
   1186          '.',
   1187          0
   1188     };
   1189 
   1190     // Both should match in either mode, bytes or UTF-8
   1191     RE re_test1(".........");
   1192     CHECK(re_test1.FullMatch(utf8_string));
   1193     RE re_test2("...", pcrecpp::UTF8());
   1194     CHECK(re_test2.FullMatch(utf8_string));
   1195 
   1196     // Check that '.' matches one byte or UTF-8 character
   1197     // according to the mode.
   1198     string ss;
   1199     RE re_test3("(.)");
   1200     CHECK(re_test3.PartialMatch(utf8_string, &ss));
   1201     CHECK_EQ(ss, string("\xe6"));
   1202     RE re_test4("(.)", pcrecpp::UTF8());
   1203     CHECK(re_test4.PartialMatch(utf8_string, &ss));
   1204     CHECK_EQ(ss, string("\xe6\x97\xa5"));
   1205 
   1206     // Check that string matches itself in either mode
   1207     RE re_test5(utf8_string);
   1208     CHECK(re_test5.FullMatch(utf8_string));
   1209     RE re_test6(utf8_string, pcrecpp::UTF8());
   1210     CHECK(re_test6.FullMatch(utf8_string));
   1211 
   1212     // Check that pattern matches string only in UTF8 mode
   1213     RE re_test7(utf8_pattern);
   1214     CHECK(!re_test7.FullMatch(utf8_string));
   1215     RE re_test8(utf8_pattern, pcrecpp::UTF8());
   1216     CHECK(re_test8.FullMatch(utf8_string));
   1217   }
   1218 
   1219   // Check that ungreedy, UTF8 regular expressions don't match when they
   1220   // oughtn't -- see bug 82246.
   1221   {
   1222     // This code always worked.
   1223     const char* pattern = "\\w+X";
   1224     const string target = "a aX";
   1225     RE match_sentence(pattern);
   1226     RE match_sentence_re(pattern, pcrecpp::UTF8());
   1227 
   1228     CHECK(!match_sentence.FullMatch(target));
   1229     CHECK(!match_sentence_re.FullMatch(target));
   1230   }
   1231 
   1232   {
   1233     const char* pattern = "(?U)\\w+X";
   1234     const string target = "a aX";
   1235     RE match_sentence(pattern);
   1236     RE match_sentence_re(pattern, pcrecpp::UTF8());
   1237 
   1238     CHECK(!match_sentence.FullMatch(target));
   1239     CHECK(!match_sentence_re.FullMatch(target));
   1240   }
   1241 #endif  /* def SUPPORT_UTF8 */
   1242 
   1243   printf("Testing error reporting\n");
   1244 
   1245   { RE re("a\\1"); CHECK(!re.error().empty()); }
   1246   {
   1247     RE re("a[x");
   1248     CHECK(!re.error().empty());
   1249   }
   1250   {
   1251     RE re("a[z-a]");
   1252     CHECK(!re.error().empty());
   1253   }
   1254   {
   1255     RE re("a[[:foobar:]]");
   1256     CHECK(!re.error().empty());
   1257   }
   1258   {
   1259     RE re("a(b");
   1260     CHECK(!re.error().empty());
   1261   }
   1262   {
   1263     RE re("a\\");
   1264     CHECK(!re.error().empty());
   1265   }
   1266 
   1267   // Test that recursion is stopped
   1268   TestRecursion();
   1269 
   1270   // Test Options
   1271   if (getenv("VERBOSE_TEST") != NULL)
   1272     VERBOSE_TEST  = true;
   1273   TestOptions();
   1274 
   1275   // Test the constructors
   1276   TestConstructors();
   1277 
   1278   // Done
   1279   printf("OK\n");
   1280 
   1281   return 0;
   1282 }
   1283