Home | History | Annotate | Download | only in dist
      1 // -*- coding: utf-8 -*-
      2 //
      3 // Copyright (c) 2005 - 2010, Google Inc.
      4 // All rights reserved.
      5 //
      6 // Redistribution and use in source and binary forms, with or without
      7 // modification, are permitted provided that the following conditions are
      8 // met:
      9 //
     10 //     * Redistributions of source code must retain the above copyright
     11 // notice, this list of conditions and the following disclaimer.
     12 //     * Redistributions in binary form must reproduce the above
     13 // copyright notice, this list of conditions and the following disclaimer
     14 // in the documentation and/or other materials provided with the
     15 // distribution.
     16 //     * Neither the name of Google Inc. nor the names of its
     17 // contributors may be used to endorse or promote products derived from
     18 // this software without specific prior written permission.
     19 //
     20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31 //
     32 // Author: Sanjay Ghemawat
     33 //
     34 // TODO: Test extractions for PartialMatch/Consume
     35 
     36 #ifdef HAVE_CONFIG_H
     37 #include "config.h"
     38 #endif
     39 
     40 #include <stdio.h>
     41 #include <string.h>      /* for memset and strcmp */
     42 #include <cassert>
     43 #include <vector>
     44 #include "pcrecpp.h"
     45 
     46 using pcrecpp::StringPiece;
     47 using pcrecpp::RE;
     48 using pcrecpp::RE_Options;
     49 using pcrecpp::Hex;
     50 using pcrecpp::Octal;
     51 using pcrecpp::CRadix;
     52 
     53 static bool VERBOSE_TEST  = false;
     54 
     55 // CHECK dies with a fatal error if condition is not true.  It is *not*
     56 // controlled by NDEBUG, so the check will be executed regardless of
     57 // compilation mode.  Therefore, it is safe to do things like:
     58 //    CHECK_EQ(fp->Write(x), 4)
     59 #define CHECK(condition) do {                           \
     60   if (!(condition)) {                                   \
     61     fprintf(stderr, "%s:%d: Check failed: %s\n",        \
     62             __FILE__, __LINE__, #condition);            \
     63     exit(1);                                            \
     64   }                                                     \
     65 } while (0)
     66 
     67 #define CHECK_EQ(a, b)   CHECK(a == b)
     68 
     69 static void Timing1(int num_iters) {
     70   // Same pattern lots of times
     71   RE pattern("ruby:\\d+");
     72   StringPiece p("ruby:1234");
     73   for (int j = num_iters; j > 0; j--) {
     74     CHECK(pattern.FullMatch(p));
     75   }
     76 }
     77 
     78 static void Timing2(int num_iters) {
     79   // Same pattern lots of times
     80   RE pattern("ruby:(\\d+)");
     81   int i;
     82   for (int j = num_iters; j > 0; j--) {
     83     CHECK(pattern.FullMatch("ruby:1234", &i));
     84     CHECK_EQ(i, 1234);
     85   }
     86 }
     87 
     88 static void Timing3(int num_iters) {
     89   string text_string;
     90   for (int j = num_iters; j > 0; j--) {
     91     text_string += "this is another line\n";
     92   }
     93 
     94   RE line_matcher(".*\n");
     95   string line;
     96   StringPiece text(text_string);
     97   int counter = 0;
     98   while (line_matcher.Consume(&text)) {
     99     counter++;
    100   }
    101   printf("Matched %d lines\n", counter);
    102 }
    103 
    104 #if 0  // uncomment this if you have a way of defining VirtualProcessSize()
    105 
    106 static void LeakTest() {
    107   // Check for memory leaks
    108   unsigned long long initial_size = 0;
    109   for (int i = 0; i < 100000; i++) {
    110     if (i == 50000) {
    111       initial_size = VirtualProcessSize();
    112       printf("Size after 50000: %llu\n", initial_size);
    113     }
    114     char buf[100];  // definitely big enough
    115     sprintf(buf, "pat%09d", i);
    116     RE newre(buf);
    117   }
    118   uint64 final_size = VirtualProcessSize();
    119   printf("Size after 100000: %llu\n", final_size);
    120   const double growth = double(final_size - initial_size) / final_size;
    121   printf("Growth: %0.2f%%", growth * 100);
    122   CHECK(growth < 0.02);       // Allow < 2% growth
    123 }
    124 
    125 #endif
    126 
    127 static void RadixTests() {
    128   printf("Testing hex\n");
    129 
    130 #define CHECK_HEX(type, value) \
    131   do { \
    132     type v; \
    133     CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
    134     CHECK_EQ(v, 0x ## value); \
    135     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
    136     CHECK_EQ(v, 0x ## value); \
    137   } while(0)
    138 
    139   CHECK_HEX(short,              2bad);
    140   CHECK_HEX(unsigned short,     2badU);
    141   CHECK_HEX(int,                dead);
    142   CHECK_HEX(unsigned int,       deadU);
    143   CHECK_HEX(long,               7eadbeefL);
    144   CHECK_HEX(unsigned long,      deadbeefUL);
    145 #ifdef HAVE_LONG_LONG
    146   CHECK_HEX(long long,          12345678deadbeefLL);
    147 #endif
    148 #ifdef HAVE_UNSIGNED_LONG_LONG
    149   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
    150 #endif
    151 
    152 #undef CHECK_HEX
    153 
    154   printf("Testing octal\n");
    155 
    156 #define CHECK_OCTAL(type, value) \
    157   do { \
    158     type v; \
    159     CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
    160     CHECK_EQ(v, 0 ## value); \
    161     CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
    162     CHECK_EQ(v, 0 ## value); \
    163   } while(0)
    164 
    165   CHECK_OCTAL(short,              77777);
    166   CHECK_OCTAL(unsigned short,     177777U);
    167   CHECK_OCTAL(int,                17777777777);
    168   CHECK_OCTAL(unsigned int,       37777777777U);
    169   CHECK_OCTAL(long,               17777777777L);
    170   CHECK_OCTAL(unsigned long,      37777777777UL);
    171 #ifdef HAVE_LONG_LONG
    172   CHECK_OCTAL(long long,          777777777777777777777LL);
    173 #endif
    174 #ifdef HAVE_UNSIGNED_LONG_LONG
    175   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
    176 #endif
    177 
    178 #undef CHECK_OCTAL
    179 
    180   printf("Testing decimal\n");
    181 
    182 #define CHECK_DECIMAL(type, value) \
    183   do { \
    184     type v; \
    185     CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
    186     CHECK_EQ(v, value); \
    187     CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
    188     CHECK_EQ(v, value); \
    189   } while(0)
    190 
    191   CHECK_DECIMAL(short,              -1);
    192   CHECK_DECIMAL(unsigned short,     9999);
    193   CHECK_DECIMAL(int,                -1000);
    194   CHECK_DECIMAL(unsigned int,       12345U);
    195   CHECK_DECIMAL(long,               -10000000L);
    196   CHECK_DECIMAL(unsigned long,      3083324652U);
    197 #ifdef HAVE_LONG_LONG
    198   CHECK_DECIMAL(long long,          -100000000000000LL);
    199 #endif
    200 #ifdef HAVE_UNSIGNED_LONG_LONG
    201   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
    202 #endif
    203 
    204 #undef CHECK_DECIMAL
    205 
    206 }
    207 
    208 static void TestReplace() {
    209   printf("Testing Replace\n");
    210 
    211   struct ReplaceTest {
    212     const char *regexp;
    213     const char *rewrite;
    214     const char *original;
    215     const char *single;
    216     const char *global;
    217     int global_count;         // the expected return value from ReplaceAll
    218   };
    219   static const ReplaceTest tests[] = {
    220     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
    221       "\\2\\1ay",
    222       "the quick brown fox jumps over the lazy dogs.",
    223       "ethay quick brown fox jumps over the lazy dogs.",
    224       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
    225       9 },
    226     { "\\w+",
    227       "\\0-NOSPAM",
    228       "paul.haahr (at) google.com",
    229       "paul-NOSPAM.haahr (at) google.com",
    230       "paul-NOSPAM.haahr-NOSPAM (at) google-NOSPAM.com-NOSPAM",
    231       4 },
    232     { "^",
    233       "(START)",
    234       "foo",
    235       "(START)foo",
    236       "(START)foo",
    237       1 },
    238     { "^",
    239       "(START)",
    240       "",
    241       "(START)",
    242       "(START)",
    243       1 },
    244     { "$",
    245       "(END)",
    246       "",
    247       "(END)",
    248       "(END)",
    249       1 },
    250     { "b",
    251       "bb",
    252       "ababababab",
    253       "abbabababab",
    254       "abbabbabbabbabb",
    255        5 },
    256     { "b",
    257       "bb",
    258       "bbbbbb",
    259       "bbbbbbb",
    260       "bbbbbbbbbbbb",
    261       6 },
    262     { "b+",
    263       "bb",
    264       "bbbbbb",
    265       "bb",
    266       "bb",
    267       1 },
    268     { "b*",
    269       "bb",
    270       "bbbbbb",
    271       "bb",
    272       "bbbb",
    273       2 },
    274     { "b*",
    275       "bb",
    276       "aaaaa",
    277       "bbaaaaa",
    278       "bbabbabbabbabbabb",
    279       6 },
    280     { "b*",
    281       "bb",
    282       "aa\naa\n",
    283       "bbaa\naa\n",
    284       "bbabbabb\nbbabbabb\nbb",
    285       7 },
    286     { "b*",
    287       "bb",
    288       "aa\raa\r",
    289       "bbaa\raa\r",
    290       "bbabbabb\rbbabbabb\rbb",
    291       7 },
    292     { "b*",
    293       "bb",
    294       "aa\r\naa\r\n",
    295       "bbaa\r\naa\r\n",
    296       "bbabbabb\r\nbbabbabb\r\nbb",
    297       7 },
    298     // Check empty-string matching (it's tricky!)
    299     { "aa|b*",
    300       "@",
    301       "aa",
    302       "@",
    303       "@@",
    304       2 },
    305     { "b*|aa",
    306       "@",
    307       "aa",
    308       "@aa",
    309       "@@@",
    310       3 },
    311 #ifdef SUPPORT_UTF8
    312     { "b*",
    313       "bb",
    314       "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",   // utf8
    315       "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
    316       "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
    317       5 },
    318     { "b*",
    319       "bb",
    320       "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",   // utf8
    321       "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
    322       ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
    323        "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
    324       9 },
    325 #endif
    326     { "", NULL, NULL, NULL, NULL, 0 }
    327   };
    328 
    329 #ifdef SUPPORT_UTF8
    330   const bool support_utf8 = true;
    331 #else
    332   const bool support_utf8 = false;
    333 #endif
    334 
    335   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
    336     RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
    337     assert(re.error().empty());
    338     string one(t->original);
    339     CHECK(re.Replace(t->rewrite, &one));
    340     CHECK_EQ(one, t->single);
    341     string all(t->original);
    342     const int replace_count = re.GlobalReplace(t->rewrite, &all);
    343     CHECK_EQ(all, t->global);
    344     CHECK_EQ(replace_count, t->global_count);
    345   }
    346 
    347   // One final test: test \r\n replacement when we're not in CRLF mode
    348   {
    349     RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
    350     assert(re.error().empty());
    351     string all("aa\r\naa\r\n");
    352     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
    353     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
    354   }
    355   {
    356     RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
    357     assert(re.error().empty());
    358     string all("aa\r\naa\r\n");
    359     CHECK_EQ(re.GlobalReplace("bb", &all), 9);
    360     CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
    361   }
    362   // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
    363   //       Alas, the answer depends on how pcre was compiled.
    364 }
    365 
    366 static void TestExtract() {
    367   printf("Testing Extract\n");
    368 
    369   string s;
    370 
    371   CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris (at) kremvax.ru", &s));
    372   CHECK_EQ(s, "kremvax!boris");
    373 
    374   // check the RE interface as well
    375   CHECK(RE(".*").Extract("'\\0'", "foo", &s));
    376   CHECK_EQ(s, "'foo'");
    377   CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
    378   CHECK_EQ(s, "'foo'");
    379 }
    380 
    381 static void TestConsume() {
    382   printf("Testing Consume\n");
    383 
    384   string word;
    385 
    386   string s("   aaa b!@#$@#$cccc");
    387   StringPiece input(s);
    388 
    389   RE r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
    390   CHECK(r.Consume(&input, &word));
    391   CHECK_EQ(word, "aaa");
    392   CHECK(r.Consume(&input, &word));
    393   CHECK_EQ(word, "b");
    394   CHECK(! r.Consume(&input, &word));
    395 }
    396 
    397 static void TestFindAndConsume() {
    398   printf("Testing FindAndConsume\n");
    399 
    400   string word;
    401 
    402   string s("   aaa b!@#$@#$cccc");
    403   StringPiece input(s);
    404 
    405   RE r("(\\w+)");      // matches a word
    406   CHECK(r.FindAndConsume(&input, &word));
    407   CHECK_EQ(word, "aaa");
    408   CHECK(r.FindAndConsume(&input, &word));
    409   CHECK_EQ(word, "b");
    410   CHECK(r.FindAndConsume(&input, &word));
    411   CHECK_EQ(word, "cccc");
    412   CHECK(! r.FindAndConsume(&input, &word));
    413 }
    414 
    415 static void TestMatchNumberPeculiarity() {
    416   printf("Testing match-number peculiarity\n");
    417 
    418   string word1;
    419   string word2;
    420   string word3;
    421 
    422   RE r("(foo)|(bar)|(baz)");
    423   CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
    424   CHECK_EQ(word1, "foo");
    425   CHECK_EQ(word2, "");
    426   CHECK_EQ(word3, "");
    427   CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
    428   CHECK_EQ(word1, "");
    429   CHECK_EQ(word2, "bar");
    430   CHECK_EQ(word3, "");
    431   CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
    432   CHECK_EQ(word1, "");
    433   CHECK_EQ(word2, "");
    434   CHECK_EQ(word3, "baz");
    435   CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
    436 
    437   string a;
    438   CHECK(RE("(foo)|hello").FullMatch("hello", &a));
    439   CHECK_EQ(a, "");
    440 }
    441 
    442 static void TestRecursion() {
    443   printf("Testing recursion\n");
    444 
    445   // Get one string that passes (sometimes), one that never does.
    446   string text_good("abcdefghijk");
    447   string text_bad("acdefghijkl");
    448 
    449   // According to pcretest, matching text_good against (\w+)*b
    450   // requires match_limit of at least 8192, and match_recursion_limit
    451   // of at least 37.
    452 
    453   RE_Options options_ml;
    454   options_ml.set_match_limit(8192);
    455   RE re("(\\w+)*b", options_ml);
    456   CHECK(re.PartialMatch(text_good) == true);
    457   CHECK(re.PartialMatch(text_bad) == false);
    458   CHECK(re.FullMatch(text_good) == false);
    459   CHECK(re.FullMatch(text_bad) == false);
    460 
    461   options_ml.set_match_limit(1024);
    462   RE re2("(\\w+)*b", options_ml);
    463   CHECK(re2.PartialMatch(text_good) == false);   // because of match_limit
    464   CHECK(re2.PartialMatch(text_bad) == false);
    465   CHECK(re2.FullMatch(text_good) == false);
    466   CHECK(re2.FullMatch(text_bad) == false);
    467 
    468   RE_Options options_mlr;
    469   options_mlr.set_match_limit_recursion(50);
    470   RE re3("(\\w+)*b", options_mlr);
    471   CHECK(re3.PartialMatch(text_good) == true);
    472   CHECK(re3.PartialMatch(text_bad) == false);
    473   CHECK(re3.FullMatch(text_good) == false);
    474   CHECK(re3.FullMatch(text_bad) == false);
    475 
    476   options_mlr.set_match_limit_recursion(10);
    477   RE re4("(\\w+)*b", options_mlr);
    478   CHECK(re4.PartialMatch(text_good) == false);
    479   CHECK(re4.PartialMatch(text_bad) == false);
    480   CHECK(re4.FullMatch(text_good) == false);
    481   CHECK(re4.FullMatch(text_bad) == false);
    482 }
    483 
    484 // A meta-quoted string, interpreted as a pattern, should always match
    485 // the original unquoted string.
    486 static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
    487   string quoted = RE::QuoteMeta(unquoted);
    488   RE re(quoted, options);
    489   CHECK(re.FullMatch(unquoted));
    490 }
    491 
    492 // A string containing meaningful regexp characters, which is then meta-
    493 // quoted, should not generally match a string the unquoted string does.
    494 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
    495                                   RE_Options options = RE_Options()) {
    496   string quoted = RE::QuoteMeta(unquoted);
    497   RE re(quoted, options);
    498   CHECK(!re.FullMatch(should_not_match));
    499 }
    500 
    501 // Tests that quoted meta characters match their original strings,
    502 // and that a few things that shouldn't match indeed do not.
    503 static void TestQuotaMetaSimple() {
    504   TestQuoteMeta("foo");
    505   TestQuoteMeta("foo.bar");
    506   TestQuoteMeta("foo\\.bar");
    507   TestQuoteMeta("[1-9]");
    508   TestQuoteMeta("1.5-2.0?");
    509   TestQuoteMeta("\\d");
    510   TestQuoteMeta("Who doesn't like ice cream?");
    511   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
    512   TestQuoteMeta("((?!)xxx).*yyy");
    513   TestQuoteMeta("([");
    514   TestQuoteMeta(string("foo\0bar", 7));
    515 }
    516 
    517 static void TestQuoteMetaSimpleNegative() {
    518   NegativeTestQuoteMeta("foo", "bar");
    519   NegativeTestQuoteMeta("...", "bar");
    520   NegativeTestQuoteMeta("\\.", ".");
    521   NegativeTestQuoteMeta("\\.", "..");
    522   NegativeTestQuoteMeta("(a)", "a");
    523   NegativeTestQuoteMeta("(a|b)", "a");
    524   NegativeTestQuoteMeta("(a|b)", "(a)");
    525   NegativeTestQuoteMeta("(a|b)", "a|b");
    526   NegativeTestQuoteMeta("[0-9]", "0");
    527   NegativeTestQuoteMeta("[0-9]", "0-9");
    528   NegativeTestQuoteMeta("[0-9]", "[9]");
    529   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
    530 }
    531 
    532 static void TestQuoteMetaLatin1() {
    533   TestQuoteMeta("3\xb2 = 9");
    534 }
    535 
    536 static void TestQuoteMetaUtf8() {
    537 #ifdef SUPPORT_UTF8
    538   TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
    539   TestQuoteMeta("xyz", pcrecpp::UTF8());            // No fancy utf8
    540   TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8());       // 2-byte utf8 (degree symbol)
    541   TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8());  // As a middle character
    542   TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8());   // 3-byte utf8 (double prime)
    543   TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
    544   TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
    545   NegativeTestQuoteMeta("27\xc2\xb0",               // 2-byte utf (degree symbol)
    546                         "27\\\xc2\\\xb0",
    547                         pcrecpp::UTF8());
    548 #endif
    549 }
    550 
    551 static void TestQuoteMetaAll() {
    552   printf("Testing QuoteMeta\n");
    553   TestQuotaMetaSimple();
    554   TestQuoteMetaSimpleNegative();
    555   TestQuoteMetaLatin1();
    556   TestQuoteMetaUtf8();
    557 }
    558 
    559 //
    560 // Options tests contributed by
    561 // Giuseppe Maxia, CTO, Stardata s.r.l.
    562 // July 2005
    563 //
    564 static void GetOneOptionResult(
    565                 const char *option_name,
    566                 const char *regex,
    567                 const char *str,
    568                 RE_Options options,
    569                 bool full,
    570                 string expected) {
    571 
    572   printf("Testing Option <%s>\n", option_name);
    573   if(VERBOSE_TEST)
    574     printf("/%s/ finds \"%s\" within \"%s\" \n",
    575                     regex,
    576                     expected.c_str(),
    577                     str);
    578   string captured("");
    579   if (full)
    580     RE(regex,options).FullMatch(str, &captured);
    581   else
    582     RE(regex,options).PartialMatch(str, &captured);
    583   CHECK_EQ(captured, expected);
    584 }
    585 
    586 static void TestOneOption(
    587                 const char *option_name,
    588                 const char *regex,
    589                 const char *str,
    590                 RE_Options options,
    591                 bool full,
    592                 bool assertive = true) {
    593 
    594   printf("Testing Option <%s>\n", option_name);
    595   if (VERBOSE_TEST)
    596     printf("'%s' %s /%s/ \n",
    597                   str,
    598                   (assertive? "matches" : "doesn't match"),
    599                   regex);
    600   if (assertive) {
    601     if (full)
    602       CHECK(RE(regex,options).FullMatch(str));
    603     else
    604       CHECK(RE(regex,options).PartialMatch(str));
    605   } else {
    606     if (full)
    607       CHECK(!RE(regex,options).FullMatch(str));
    608     else
    609       CHECK(!RE(regex,options).PartialMatch(str));
    610   }
    611 }
    612 
    613 static void Test_CASELESS() {
    614   RE_Options options;
    615   RE_Options options2;
    616 
    617   options.set_caseless(true);
    618   TestOneOption("CASELESS (class)",  "HELLO",    "hello", options, false);
    619   TestOneOption("CASELESS (class2)", "HELLO",    "hello", options2.set_caseless(true), false);
    620   TestOneOption("CASELESS (class)",  "^[A-Z]+$", "Hello", options, false);
    621 
    622   TestOneOption("CASELESS (function)", "HELLO",    "hello", pcrecpp::CASELESS(), false);
    623   TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
    624   options.set_caseless(false);
    625   TestOneOption("no CASELESS", "HELLO",    "hello", options, false, false);
    626 }
    627 
    628 static void Test_MULTILINE() {
    629   RE_Options options;
    630   RE_Options options2;
    631   const char *str = "HELLO\n" "cruel\n" "world\n";
    632 
    633   options.set_multiline(true);
    634   TestOneOption("MULTILINE (class)",    "^cruel$", str, options, false);
    635   TestOneOption("MULTILINE (class2)",   "^cruel$", str, options2.set_multiline(true), false);
    636   TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
    637   options.set_multiline(false);
    638   TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
    639 }
    640 
    641 static void Test_DOTALL() {
    642   RE_Options options;
    643   RE_Options options2;
    644   const char *str = "HELLO\n" "cruel\n" "world";
    645 
    646   options.set_dotall(true);
    647   TestOneOption("DOTALL (class)",    "HELLO.*world", str, options, true);
    648   TestOneOption("DOTALL (class2)",   "HELLO.*world", str, options2.set_dotall(true), true);
    649   TestOneOption("DOTALL (function)",    "HELLO.*world", str, pcrecpp::DOTALL(), true);
    650   options.set_dotall(false);
    651   TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
    652 }
    653 
    654 static void Test_DOLLAR_ENDONLY() {
    655   RE_Options options;
    656   RE_Options options2;
    657   const char *str = "HELLO world\n";
    658 
    659   TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
    660   options.set_dollar_endonly(true);
    661   TestOneOption("DOLLAR_ENDONLY 1",    "world$", str, options, false, false);
    662   TestOneOption("DOLLAR_ENDONLY 2",    "world$", str, options2.set_dollar_endonly(true), false, false);
    663 }
    664 
    665 static void Test_EXTRA() {
    666   RE_Options options;
    667   const char *str = "HELLO";
    668 
    669   options.set_extra(true);
    670   TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
    671   TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
    672   options.set_extra(false);
    673   TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
    674 }
    675 
    676 static void Test_EXTENDED() {
    677   RE_Options options;
    678   RE_Options options2;
    679   const char *str = "HELLO world";
    680 
    681   options.set_extended(true);
    682   TestOneOption("EXTENDED (class)",    "HELLO world", str, options, false, false);
    683   TestOneOption("EXTENDED (class2)",   "HELLO world", str, options2.set_extended(true), false, false);
    684   TestOneOption("EXTENDED (class)",
    685                     "^ HE L{2} O "
    686                     "\\s+        "
    687                     "\\w+ $      ",
    688                     str,
    689                     options,
    690                     false);
    691 
    692   TestOneOption("EXTENDED (function)",    "HELLO world", str, pcrecpp::EXTENDED(), false, false);
    693   TestOneOption("EXTENDED (function)",
    694                     "^ HE L{2} O "
    695                     "\\s+        "
    696                     "\\w+ $      ",
    697                     str,
    698                     pcrecpp::EXTENDED(),
    699                     false);
    700 
    701   options.set_extended(false);
    702   TestOneOption("no EXTENDED", "HELLO world", str, options, false);
    703 }
    704 
    705 static void Test_NO_AUTO_CAPTURE() {
    706   RE_Options options;
    707   const char *str = "HELLO world";
    708   string captured;
    709 
    710   printf("Testing Option <no NO_AUTO_CAPTURE>\n");
    711   if (VERBOSE_TEST)
    712     printf("parentheses capture text\n");
    713   RE re("(world|universe)$", options);
    714   CHECK(re.Extract("\\1", str , &captured));
    715   CHECK_EQ(captured, "world");
    716   options.set_no_auto_capture(true);
    717   printf("testing Option <NO_AUTO_CAPTURE>\n");
    718   if (VERBOSE_TEST)
    719     printf("parentheses do not capture text\n");
    720   re.Extract("\\1",str, &captured );
    721   CHECK_EQ(captured, "world");
    722 }
    723 
    724 static void Test_UNGREEDY() {
    725   RE_Options options;
    726   const char *str = "HELLO, 'this' is the 'world'";
    727 
    728   options.set_ungreedy(true);
    729   GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
    730   GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
    731   GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
    732 
    733   options.set_ungreedy(false);
    734   GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
    735   GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
    736 }
    737 
    738 static void Test_all_options() {
    739   const char *str = "HELLO\n" "cruel\n" "world";
    740   RE_Options options;
    741   options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
    742 
    743   TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
    744   options.set_all_options(0);
    745   TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
    746   options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
    747 
    748   TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
    749   TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
    750                   " ^ c r u e l $ ",
    751                   str,
    752                   RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
    753                   false);
    754 
    755   TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
    756                   " ^ c r u e l $ ",
    757                   str,
    758                   RE_Options()
    759                        .set_multiline(true)
    760                        .set_extended(true),
    761                   false);
    762 
    763   options.set_all_options(0);
    764   TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
    765 
    766 }
    767 
    768 static void TestOptions() {
    769   printf("Testing Options\n");
    770   Test_CASELESS();
    771   Test_MULTILINE();
    772   Test_DOTALL();
    773   Test_DOLLAR_ENDONLY();
    774   Test_EXTENDED();
    775   Test_NO_AUTO_CAPTURE();
    776   Test_UNGREEDY();
    777   Test_EXTRA();
    778   Test_all_options();
    779 }
    780 
    781 static void TestConstructors() {
    782   printf("Testing constructors\n");
    783 
    784   RE_Options options;
    785   options.set_dotall(true);
    786   const char *str = "HELLO\n" "cruel\n" "world";
    787 
    788   RE orig("HELLO.*world", options);
    789   CHECK(orig.FullMatch(str));
    790 
    791   RE copy1(orig);
    792   CHECK(copy1.FullMatch(str));
    793 
    794   RE copy2("not a match");
    795   CHECK(!copy2.FullMatch(str));
    796   copy2 = copy1;
    797   CHECK(copy2.FullMatch(str));
    798   copy2 = orig;
    799   CHECK(copy2.FullMatch(str));
    800 
    801   // Make sure when we assign to ourselves, nothing bad happens
    802   orig = orig;
    803   copy1 = copy1;
    804   copy2 = copy2;
    805   CHECK(orig.FullMatch(str));
    806   CHECK(copy1.FullMatch(str));
    807   CHECK(copy2.FullMatch(str));
    808 }
    809 
    810 int main(int argc, char** argv) {
    811   // Treat any flag as --help
    812   if (argc > 1 && argv[1][0] == '-') {
    813     printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
    814            "       If 'timingX ###' is specified, run the given timing test\n"
    815            "       with the given number of iterations, rather than running\n"
    816            "       the default corectness test.\n", argv[0]);
    817     return 0;
    818   }
    819 
    820   if (argc > 1) {
    821     if ( argc == 2 || atoi(argv[2]) == 0) {
    822       printf("timing mode needs a num-iters argument\n");
    823       return 1;
    824     }
    825     if (!strcmp(argv[1], "timing1"))
    826       Timing1(atoi(argv[2]));
    827     else if (!strcmp(argv[1], "timing2"))
    828       Timing2(atoi(argv[2]));
    829     else if (!strcmp(argv[1], "timing3"))
    830       Timing3(atoi(argv[2]));
    831     else
    832       printf("Unknown argument '%s'\n", argv[1]);
    833     return 0;
    834   }
    835 
    836   printf("PCRE C++ wrapper tests\n");
    837   printf("Testing FullMatch\n");
    838 
    839   int i;
    840   string s;
    841 
    842   /***** FullMatch with no args *****/
    843 
    844   CHECK(RE("h.*o").FullMatch("hello"));
    845   CHECK(!RE("h.*o").FullMatch("othello"));     // Must be anchored at front
    846   CHECK(!RE("h.*o").FullMatch("hello!"));      // Must be anchored at end
    847   CHECK(RE("a*").FullMatch("aaaa"));           // Fullmatch with normal op
    848   CHECK(RE("a*?").FullMatch("aaaa"));          // Fullmatch with nongreedy op
    849   CHECK(RE("a*?\\z").FullMatch("aaaa"));       // Two unusual ops
    850 
    851   /***** FullMatch with args *****/
    852 
    853   // Zero-arg
    854   CHECK(RE("\\d+").FullMatch("1001"));
    855 
    856   // Single-arg
    857   CHECK(RE("(\\d+)").FullMatch("1001",   &i));
    858   CHECK_EQ(i, 1001);
    859   CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
    860   CHECK_EQ(i, -123);
    861   CHECK(!RE("()\\d+").FullMatch("10", &i));
    862   CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
    863                                 &i));
    864 
    865   // Digits surrounding integer-arg
    866   CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
    867   CHECK_EQ(i, 23);
    868   CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
    869   CHECK_EQ(i, 1);
    870   CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
    871   CHECK_EQ(i, -1);
    872   CHECK(RE("(\\d)").PartialMatch("1234", &i));
    873   CHECK_EQ(i, 1);
    874   CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
    875   CHECK_EQ(i, -1);
    876 
    877   // String-arg
    878   CHECK(RE("h(.*)o").FullMatch("hello", &s));
    879   CHECK_EQ(s, string("ell"));
    880 
    881   // StringPiece-arg
    882   StringPiece sp;
    883   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
    884   CHECK_EQ(sp.size(), 4);
    885   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
    886   CHECK_EQ(i, 1234);
    887 
    888   // Multi-arg
    889   CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
    890   CHECK_EQ(s, string("ruby"));
    891   CHECK_EQ(i, 1234);
    892 
    893   // Ignore non-void* NULL arg
    894   CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
    895   CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
    896   CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
    897   CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
    898 #ifdef HAVE_LONG_LONG
    899   CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
    900 #endif
    901   CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
    902   CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
    903 
    904   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
    905   CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
    906   CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
    907   CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
    908   CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
    909   CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
    910 
    911   // Ignored arg
    912   CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
    913   CHECK_EQ(s, string("ruby"));
    914   CHECK_EQ(i, 1234);
    915 
    916   // Type tests
    917   {
    918     char c;
    919     CHECK(RE("(H)ello").FullMatch("Hello", &c));
    920     CHECK_EQ(c, 'H');
    921   }
    922   {
    923     unsigned char c;
    924     CHECK(RE("(H)ello").FullMatch("Hello", &c));
    925     CHECK_EQ(c, static_cast<unsigned char>('H'));
    926   }
    927   {
    928     short v;
    929     CHECK(RE("(-?\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
    930     CHECK(RE("(-?\\d+)").FullMatch("-100",    &v));    CHECK_EQ(v, -100);
    931     CHECK(RE("(-?\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
    932     CHECK(RE("(-?\\d+)").FullMatch("-32768",  &v));    CHECK_EQ(v, -32768);
    933     CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
    934     CHECK(!RE("(-?\\d+)").FullMatch("32768",  &v));
    935   }
    936   {
    937     unsigned short v;
    938     CHECK(RE("(\\d+)").FullMatch("100",     &v));    CHECK_EQ(v, 100);
    939     CHECK(RE("(\\d+)").FullMatch("32767",   &v));    CHECK_EQ(v, 32767);
    940     CHECK(RE("(\\d+)").FullMatch("65535",   &v));    CHECK_EQ(v, 65535);
    941     CHECK(!RE("(\\d+)").FullMatch("65536",  &v));
    942   }
    943   {
    944     int v;
    945     static const int max_value = 0x7fffffff;
    946     static const int min_value = -max_value - 1;
    947     CHECK(RE("(-?\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
    948     CHECK(RE("(-?\\d+)").FullMatch("-100",        &v)); CHECK_EQ(v, -100);
    949     CHECK(RE("(-?\\d+)").FullMatch("2147483647",  &v)); CHECK_EQ(v, max_value);
    950     CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
    951     CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
    952     CHECK(!RE("(-?\\d+)").FullMatch("2147483648",  &v));
    953   }
    954   {
    955     unsigned int v;
    956     static const unsigned int max_value = 0xfffffffful;
    957     CHECK(RE("(\\d+)").FullMatch("100",         &v)); CHECK_EQ(v, 100);
    958     CHECK(RE("(\\d+)").FullMatch("4294967295",  &v)); CHECK_EQ(v, max_value);
    959     CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
    960   }
    961 #ifdef HAVE_LONG_LONG
    962 # if defined(__MINGW__) || defined(__MINGW32__)
    963 #   define LLD "%I64d"
    964 #   define LLU "%I64u"
    965 # else
    966 #   define LLD "%lld"
    967 #   define LLU "%llu"
    968 # endif
    969   {
    970     long long v;
    971     static const long long max_value = 0x7fffffffffffffffLL;
    972     static const long long min_value = -max_value - 1;
    973     char buf[32];  // definitely big enough for a long long
    974 
    975     CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
    976     CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
    977 
    978     sprintf(buf, LLD, max_value);
    979     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
    980 
    981     sprintf(buf, LLD, min_value);
    982     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
    983 
    984     sprintf(buf, LLD, max_value);
    985     assert(buf[strlen(buf)-1] != '9');
    986     buf[strlen(buf)-1]++;
    987     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
    988 
    989     sprintf(buf, LLD, min_value);
    990     assert(buf[strlen(buf)-1] != '9');
    991     buf[strlen(buf)-1]++;
    992     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
    993   }
    994 #endif
    995 #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
    996   {
    997     unsigned long long v;
    998     long long v2;
    999     static const unsigned long long max_value = 0xffffffffffffffffULL;
   1000     char buf[32];  // definitely big enough for a unsigned long long
   1001 
   1002     CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
   1003     CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
   1004 
   1005     sprintf(buf, LLU, max_value);
   1006     CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
   1007 
   1008     assert(buf[strlen(buf)-1] != '9');
   1009     buf[strlen(buf)-1]++;
   1010     CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
   1011   }
   1012 #endif
   1013   {
   1014     float v;
   1015     CHECK(RE("(.*)").FullMatch("100", &v));
   1016     CHECK(RE("(.*)").FullMatch("-100.", &v));
   1017     CHECK(RE("(.*)").FullMatch("1e23", &v));
   1018   }
   1019   {
   1020     double v;
   1021     CHECK(RE("(.*)").FullMatch("100", &v));
   1022     CHECK(RE("(.*)").FullMatch("-100.", &v));
   1023     CHECK(RE("(.*)").FullMatch("1e23", &v));
   1024   }
   1025 
   1026   // Check that matching is fully anchored
   1027   CHECK(!RE("(\\d+)").FullMatch("x1001",  &i));
   1028   CHECK(!RE("(\\d+)").FullMatch("1001x",  &i));
   1029   CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
   1030   CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
   1031 
   1032   // Braces
   1033   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
   1034   CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
   1035   CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
   1036 
   1037   // Complicated RE
   1038   CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
   1039   CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
   1040   CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
   1041   CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
   1042 
   1043   // Check full-match handling (needs '$' tacked on internally)
   1044   CHECK(RE("fo|foo").FullMatch("fo"));
   1045   CHECK(RE("fo|foo").FullMatch("foo"));
   1046   CHECK(RE("fo|foo$").FullMatch("fo"));
   1047   CHECK(RE("fo|foo$").FullMatch("foo"));
   1048   CHECK(RE("foo$").FullMatch("foo"));
   1049   CHECK(!RE("foo\\$").FullMatch("foo$bar"));
   1050   CHECK(!RE("fo|bar").FullMatch("fox"));
   1051 
   1052   // Uncomment the following if we change the handling of '$' to
   1053   // prevent it from matching a trailing newline
   1054   if (false) {
   1055     // Check that we don't get bitten by pcre's special handling of a
   1056     // '\n' at the end of the string matching '$'
   1057     CHECK(!RE("foo$").PartialMatch("foo\n"));
   1058   }
   1059 
   1060   // Number of args
   1061   int a[16];
   1062   CHECK(RE("").FullMatch(""));
   1063 
   1064   memset(a, 0, sizeof(0));
   1065   CHECK(RE("(\\d){1}").FullMatch("1",
   1066                                  &a[0]));
   1067   CHECK_EQ(a[0], 1);
   1068 
   1069   memset(a, 0, sizeof(0));
   1070   CHECK(RE("(\\d)(\\d)").FullMatch("12",
   1071                                    &a[0],  &a[1]));
   1072   CHECK_EQ(a[0], 1);
   1073   CHECK_EQ(a[1], 2);
   1074 
   1075   memset(a, 0, sizeof(0));
   1076   CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
   1077                                         &a[0],  &a[1],  &a[2]));
   1078   CHECK_EQ(a[0], 1);
   1079   CHECK_EQ(a[1], 2);
   1080   CHECK_EQ(a[2], 3);
   1081 
   1082   memset(a, 0, sizeof(0));
   1083   CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
   1084                                              &a[0],  &a[1],  &a[2],  &a[3]));
   1085   CHECK_EQ(a[0], 1);
   1086   CHECK_EQ(a[1], 2);
   1087   CHECK_EQ(a[2], 3);
   1088   CHECK_EQ(a[3], 4);
   1089 
   1090   memset(a, 0, sizeof(0));
   1091   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
   1092                                                   &a[0],  &a[1],  &a[2],
   1093                                                   &a[3],  &a[4]));
   1094   CHECK_EQ(a[0], 1);
   1095   CHECK_EQ(a[1], 2);
   1096   CHECK_EQ(a[2], 3);
   1097   CHECK_EQ(a[3], 4);
   1098   CHECK_EQ(a[4], 5);
   1099 
   1100   memset(a, 0, sizeof(0));
   1101   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
   1102                                                        &a[0],  &a[1],  &a[2],
   1103                                                        &a[3],  &a[4],  &a[5]));
   1104   CHECK_EQ(a[0], 1);
   1105   CHECK_EQ(a[1], 2);
   1106   CHECK_EQ(a[2], 3);
   1107   CHECK_EQ(a[3], 4);
   1108   CHECK_EQ(a[4], 5);
   1109   CHECK_EQ(a[5], 6);
   1110 
   1111   memset(a, 0, sizeof(0));
   1112   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
   1113                                                             &a[0],  &a[1],  &a[2],  &a[3],
   1114                                                             &a[4],  &a[5],  &a[6]));
   1115   CHECK_EQ(a[0], 1);
   1116   CHECK_EQ(a[1], 2);
   1117   CHECK_EQ(a[2], 3);
   1118   CHECK_EQ(a[3], 4);
   1119   CHECK_EQ(a[4], 5);
   1120   CHECK_EQ(a[5], 6);
   1121   CHECK_EQ(a[6], 7);
   1122 
   1123   memset(a, 0, sizeof(0));
   1124   CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
   1125            "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
   1126                "1234567890123456",
   1127                &a[0],  &a[1],  &a[2],  &a[3],
   1128                &a[4],  &a[5],  &a[6],  &a[7],
   1129                &a[8],  &a[9],  &a[10], &a[11],
   1130                &a[12], &a[13], &a[14], &a[15]));
   1131   CHECK_EQ(a[0], 1);
   1132   CHECK_EQ(a[1], 2);
   1133   CHECK_EQ(a[2], 3);
   1134   CHECK_EQ(a[3], 4);
   1135   CHECK_EQ(a[4], 5);
   1136   CHECK_EQ(a[5], 6);
   1137   CHECK_EQ(a[6], 7);
   1138   CHECK_EQ(a[7], 8);
   1139   CHECK_EQ(a[8], 9);
   1140   CHECK_EQ(a[9], 0);
   1141   CHECK_EQ(a[10], 1);
   1142   CHECK_EQ(a[11], 2);
   1143   CHECK_EQ(a[12], 3);
   1144   CHECK_EQ(a[13], 4);
   1145   CHECK_EQ(a[14], 5);
   1146   CHECK_EQ(a[15], 6);
   1147 
   1148   /***** PartialMatch *****/
   1149 
   1150   printf("Testing PartialMatch\n");
   1151 
   1152   CHECK(RE("h.*o").PartialMatch("hello"));
   1153   CHECK(RE("h.*o").PartialMatch("othello"));
   1154   CHECK(RE("h.*o").PartialMatch("hello!"));
   1155   CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
   1156 
   1157   /***** other tests *****/
   1158 
   1159   RadixTests();
   1160   TestReplace();
   1161   TestExtract();
   1162   TestConsume();
   1163   TestFindAndConsume();
   1164   TestQuoteMetaAll();
   1165   TestMatchNumberPeculiarity();
   1166 
   1167   // Check the pattern() accessor
   1168   {
   1169     const string kPattern = "http://([^/]+)/.*";
   1170     const RE re(kPattern);
   1171     CHECK_EQ(kPattern, re.pattern());
   1172   }
   1173 
   1174   // Check RE error field.
   1175   {
   1176     RE re("foo");
   1177     CHECK(re.error().empty());  // Must have no error
   1178   }
   1179 
   1180 #ifdef SUPPORT_UTF8
   1181   // Check UTF-8 handling
   1182   {
   1183     printf("Testing UTF-8 handling\n");
   1184 
   1185     // Three Japanese characters (nihongo)
   1186     const unsigned char utf8_string[] = {
   1187          0xe6, 0x97, 0xa5, // 65e5
   1188          0xe6, 0x9c, 0xac, // 627c
   1189          0xe8, 0xaa, 0x9e, // 8a9e
   1190          0
   1191     };
   1192     const unsigned char utf8_pattern[] = {
   1193          '.',
   1194          0xe6, 0x9c, 0xac, // 627c
   1195          '.',
   1196          0
   1197     };
   1198 
   1199     // Both should match in either mode, bytes or UTF-8
   1200     RE re_test1(".........");
   1201     CHECK(re_test1.FullMatch(utf8_string));
   1202     RE re_test2("...", pcrecpp::UTF8());
   1203     CHECK(re_test2.FullMatch(utf8_string));
   1204 
   1205     // Check that '.' matches one byte or UTF-8 character
   1206     // according to the mode.
   1207     string ss;
   1208     RE re_test3("(.)");
   1209     CHECK(re_test3.PartialMatch(utf8_string, &ss));
   1210     CHECK_EQ(ss, string("\xe6"));
   1211     RE re_test4("(.)", pcrecpp::UTF8());
   1212     CHECK(re_test4.PartialMatch(utf8_string, &ss));
   1213     CHECK_EQ(ss, string("\xe6\x97\xa5"));
   1214 
   1215     // Check that string matches itself in either mode
   1216     RE re_test5(utf8_string);
   1217     CHECK(re_test5.FullMatch(utf8_string));
   1218     RE re_test6(utf8_string, pcrecpp::UTF8());
   1219     CHECK(re_test6.FullMatch(utf8_string));
   1220 
   1221     // Check that pattern matches string only in UTF8 mode
   1222     RE re_test7(utf8_pattern);
   1223     CHECK(!re_test7.FullMatch(utf8_string));
   1224     RE re_test8(utf8_pattern, pcrecpp::UTF8());
   1225     CHECK(re_test8.FullMatch(utf8_string));
   1226   }
   1227 
   1228   // Check that ungreedy, UTF8 regular expressions don't match when they
   1229   // oughtn't -- see bug 82246.
   1230   {
   1231     // This code always worked.
   1232     const char* pattern = "\\w+X";
   1233     const string target = "a aX";
   1234     RE match_sentence(pattern);
   1235     RE match_sentence_re(pattern, pcrecpp::UTF8());
   1236 
   1237     CHECK(!match_sentence.FullMatch(target));
   1238     CHECK(!match_sentence_re.FullMatch(target));
   1239   }
   1240 
   1241   {
   1242     const char* pattern = "(?U)\\w+X";
   1243     const string target = "a aX";
   1244     RE match_sentence(pattern);
   1245     RE match_sentence_re(pattern, pcrecpp::UTF8());
   1246 
   1247     CHECK(!match_sentence.FullMatch(target));
   1248     CHECK(!match_sentence_re.FullMatch(target));
   1249   }
   1250 #endif  /* def SUPPORT_UTF8 */
   1251 
   1252   printf("Testing error reporting\n");
   1253 
   1254   { RE re("a\\1"); CHECK(!re.error().empty()); }
   1255   {
   1256     RE re("a[x");
   1257     CHECK(!re.error().empty());
   1258   }
   1259   {
   1260     RE re("a[z-a]");
   1261     CHECK(!re.error().empty());
   1262   }
   1263   {
   1264     RE re("a[[:foobar:]]");
   1265     CHECK(!re.error().empty());
   1266   }
   1267   {
   1268     RE re("a(b");
   1269     CHECK(!re.error().empty());
   1270   }
   1271   {
   1272     RE re("a\\");
   1273     CHECK(!re.error().empty());
   1274   }
   1275 
   1276   // Test that recursion is stopped
   1277   TestRecursion();
   1278 
   1279   // Test Options
   1280   if (getenv("VERBOSE_TEST") != NULL)
   1281     VERBOSE_TEST  = true;
   1282   TestOptions();
   1283 
   1284   // Test the constructors
   1285   TestConstructors();
   1286 
   1287   // Done
   1288   printf("OK\n");
   1289 
   1290   return 0;
   1291 }
   1292