Home | History | Annotate | Download | only in testing
      1 // -*- coding: utf-8 -*-
      2 // Copyright 2002-2009 The RE2 Authors.  All Rights Reserved.
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 // TODO: Test extractions for PartialMatch/Consume
      7 
      8 #include <sys/types.h>
      9 #ifndef WIN32
     10 #include <sys/mman.h>
     11 #endif
     12 #include <sys/stat.h>
     13 #include <errno.h>
     14 #include <vector>
     15 #include "util/test.h"
     16 #include "re2/re2.h"
     17 #include "re2/regexp.h"
     18 
     19 #ifdef WIN32
     20 #include <stdio.h>
     21 #define snprintf _snprintf
     22 #endif
     23 
     24 DECLARE_bool(logtostderr);
     25 
     26 namespace re2 {
     27 
     28 TEST(RE2, HexTests) {
     29 
     30   VLOG(1) << "hex tests";
     31 
     32 #define CHECK_HEX(type, value) \
     33   do { \
     34     type v; \
     35     CHECK(RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
     36     CHECK_EQ(v, 0x ## value); \
     37     CHECK(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
     38     CHECK_EQ(v, 0x ## value); \
     39   } while(0)
     40 
     41   CHECK_HEX(short,              2bad);
     42   CHECK_HEX(unsigned short,     2badU);
     43   CHECK_HEX(int,                dead);
     44   CHECK_HEX(unsigned int,       deadU);
     45   CHECK_HEX(long,               7eadbeefL);
     46   CHECK_HEX(unsigned long,      deadbeefUL);
     47   CHECK_HEX(long long,          12345678deadbeefLL);
     48   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
     49 
     50 #undef CHECK_HEX
     51 }
     52 
     53 TEST(RE2, OctalTests) {
     54   VLOG(1) << "octal tests";
     55 
     56 #define CHECK_OCTAL(type, value) \
     57   do { \
     58     type v; \
     59     CHECK(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
     60     CHECK_EQ(v, 0 ## value); \
     61     CHECK(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
     62     CHECK_EQ(v, 0 ## value); \
     63   } while(0)
     64 
     65   CHECK_OCTAL(short,              77777);
     66   CHECK_OCTAL(unsigned short,     177777U);
     67   CHECK_OCTAL(int,                17777777777);
     68   CHECK_OCTAL(unsigned int,       37777777777U);
     69   CHECK_OCTAL(long,               17777777777L);
     70   CHECK_OCTAL(unsigned long,      37777777777UL);
     71   CHECK_OCTAL(long long,          777777777777777777777LL);
     72   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
     73 
     74 #undef CHECK_OCTAL
     75 }
     76 
     77 TEST(RE2, DecimalTests) {
     78   VLOG(1) << "decimal tests";
     79 
     80 #define CHECK_DECIMAL(type, value) \
     81   do { \
     82     type v; \
     83     CHECK(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \
     84     CHECK_EQ(v, value); \
     85     CHECK(RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
     86     CHECK_EQ(v, value); \
     87   } while(0)
     88 
     89   CHECK_DECIMAL(short,              -1);
     90   CHECK_DECIMAL(unsigned short,     9999);
     91   CHECK_DECIMAL(int,                -1000);
     92   CHECK_DECIMAL(unsigned int,       12345U);
     93   CHECK_DECIMAL(long,               -10000000L);
     94   CHECK_DECIMAL(unsigned long,      3083324652U);
     95   CHECK_DECIMAL(long long,          -100000000000000LL);
     96   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
     97 
     98 #undef CHECK_DECIMAL
     99 }
    100 
    101 TEST(RE2, Replace) {
    102   VLOG(1) << "TestReplace";
    103 
    104   struct ReplaceTest {
    105     const char *regexp;
    106     const char *rewrite;
    107     const char *original;
    108     const char *single;
    109     const char *global;
    110     int        greplace_count;
    111   };
    112   static const ReplaceTest tests[] = {
    113     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
    114       "\\2\\1ay",
    115       "the quick brown fox jumps over the lazy dogs.",
    116       "ethay quick brown fox jumps over the lazy dogs.",
    117       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
    118       9 },
    119     { "\\w+",
    120       "\\0-NOSPAM",
    121       "abcd.efghi (at) google.com",
    122       "abcd-NOSPAM.efghi (at) google.com",
    123       "abcd-NOSPAM.efghi-NOSPAM (at) google-NOSPAM.com-NOSPAM",
    124       4 },
    125     { "^",
    126       "(START)",
    127       "foo",
    128       "(START)foo",
    129       "(START)foo",
    130       1 },
    131     { "^",
    132       "(START)",
    133       "",
    134       "(START)",
    135       "(START)",
    136       1 },
    137     { "$",
    138       "(END)",
    139       "",
    140       "(END)",
    141       "(END)",
    142       1 },
    143     { "b",
    144       "bb",
    145       "ababababab",
    146       "abbabababab",
    147       "abbabbabbabbabb",
    148       5 },
    149     { "b",
    150       "bb",
    151       "bbbbbb",
    152       "bbbbbbb",
    153       "bbbbbbbbbbbb",
    154       6 },
    155     { "b+",
    156       "bb",
    157       "bbbbbb",
    158       "bb",
    159       "bb",
    160       1 },
    161     { "b*",
    162       "bb",
    163       "bbbbbb",
    164       "bb",
    165       "bb",
    166       1 },
    167     { "b*",
    168       "bb",
    169       "aaaaa",
    170       "bbaaaaa",
    171       "bbabbabbabbabbabb",
    172       6 },
    173     // Check newline handling
    174     { "a.*a",
    175       "(\\0)",
    176       "aba\naba",
    177       "(aba)\naba",
    178       "(aba)\n(aba)",
    179       2 },
    180     { "", NULL, NULL, NULL, NULL, 0 }
    181   };
    182 
    183   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
    184     VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->rewrite);
    185     string one(t->original);
    186     CHECK(RE2::Replace(&one, t->regexp, t->rewrite));
    187     CHECK_EQ(one, t->single);
    188     string all(t->original);
    189     CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
    190       << "Got: " << all;
    191     CHECK_EQ(all, t->global);
    192   }
    193 }
    194 
    195 static void TestCheckRewriteString(const char* regexp, const char* rewrite,
    196                               bool expect_ok) {
    197   string error;
    198   RE2 exp(regexp);
    199   bool actual_ok = exp.CheckRewriteString(rewrite, &error);
    200   EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
    201 }
    202 
    203 TEST(CheckRewriteString, all) {
    204   TestCheckRewriteString("abc", "foo", true);
    205   TestCheckRewriteString("abc", "foo\\", false);
    206   TestCheckRewriteString("abc", "foo\\0bar", true);
    207 
    208   TestCheckRewriteString("a(b)c", "foo", true);
    209   TestCheckRewriteString("a(b)c", "foo\\0bar", true);
    210   TestCheckRewriteString("a(b)c", "foo\\1bar", true);
    211   TestCheckRewriteString("a(b)c", "foo\\2bar", false);
    212   TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
    213 
    214   TestCheckRewriteString("a(b)(c)", "foo\\12", true);
    215   TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
    216   TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
    217 }
    218 
    219 TEST(RE2, Extract) {
    220   VLOG(1) << "TestExtract";
    221 
    222   string s;
    223 
    224   CHECK(RE2::Extract("boris (at) kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
    225   CHECK_EQ(s, "kremvax!boris");
    226 
    227   CHECK(RE2::Extract("foo", ".*", "'\\0'", &s));
    228   CHECK_EQ(s, "'foo'");
    229   // check that false match doesn't overwrite
    230   CHECK(!RE2::Extract("baz", "bar", "'\\0'", &s));
    231   CHECK_EQ(s, "'foo'");
    232 }
    233 
    234 TEST(RE2, Consume) {
    235   VLOG(1) << "TestConsume";
    236 
    237   RE2 r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
    238   string word;
    239 
    240   string s("   aaa b!@#$@#$cccc");
    241   StringPiece input(s);
    242 
    243   CHECK(RE2::Consume(&input, r, &word));
    244   CHECK_EQ(word, "aaa") << " input: " << input;
    245   CHECK(RE2::Consume(&input, r, &word));
    246   CHECK_EQ(word, "b") << " input: " << input;
    247   CHECK(! RE2::Consume(&input, r, &word)) << " input: " << input;
    248 }
    249 
    250 TEST(RE2, ConsumeN) {
    251   const string s(" one two three 4");
    252   StringPiece input(s);
    253 
    254   RE2::Arg argv[2];
    255   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
    256 
    257   // 0 arg
    258   EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0));  // Skips "one".
    259 
    260   // 1 arg
    261   string word;
    262   argv[0] = &word;
    263   EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
    264   EXPECT_EQ("two", word);
    265 
    266   // Multi-args
    267   int n;
    268   argv[1] = &n;
    269   EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
    270   EXPECT_EQ("three", word);
    271   EXPECT_EQ(4, n);
    272 }
    273 
    274 TEST(RE2, FindAndConsume) {
    275   VLOG(1) << "TestFindAndConsume";
    276 
    277   RE2 r("(\\w+)");      // matches a word
    278   string word;
    279 
    280   string s("   aaa b!@#$@#$cccc");
    281   StringPiece input(s);
    282 
    283   CHECK(RE2::FindAndConsume(&input, r, &word));
    284   CHECK_EQ(word, "aaa");
    285   CHECK(RE2::FindAndConsume(&input, r, &word));
    286   CHECK_EQ(word, "b");
    287   CHECK(RE2::FindAndConsume(&input, r, &word));
    288   CHECK_EQ(word, "cccc");
    289   CHECK(! RE2::FindAndConsume(&input, r, &word));
    290 
    291   // Check that FindAndConsume works without any submatches.
    292   // Earlier version used uninitialized data for
    293   // length to consume.
    294   input = "aaa";
    295   CHECK(RE2::FindAndConsume(&input, "aaa"));
    296   CHECK_EQ(input, "");
    297 }
    298 
    299 TEST(RE2, FindAndConsumeN) {
    300   const string s(" one two three 4");
    301   StringPiece input(s);
    302 
    303   RE2::Arg argv[2];
    304   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
    305 
    306   // 0 arg
    307   EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0));  // Skips "one".
    308 
    309   // 1 arg
    310   string word;
    311   argv[0] = &word;
    312   EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
    313   EXPECT_EQ("two", word);
    314 
    315   // Multi-args
    316   int n;
    317   argv[1] = &n;
    318   EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
    319   EXPECT_EQ("three", word);
    320   EXPECT_EQ(4, n);
    321 }
    322 
    323 TEST(RE2, MatchNumberPeculiarity) {
    324   VLOG(1) << "TestMatchNumberPeculiarity";
    325 
    326   RE2 r("(foo)|(bar)|(baz)");
    327   string word1;
    328   string word2;
    329   string word3;
    330 
    331   CHECK(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
    332   CHECK_EQ(word1, "foo");
    333   CHECK_EQ(word2, "");
    334   CHECK_EQ(word3, "");
    335   CHECK(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
    336   CHECK_EQ(word1, "");
    337   CHECK_EQ(word2, "bar");
    338   CHECK_EQ(word3, "");
    339   CHECK(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
    340   CHECK_EQ(word1, "");
    341   CHECK_EQ(word2, "");
    342   CHECK_EQ(word3, "baz");
    343   CHECK(!RE2::PartialMatch("f", r, &word1, &word2, &word3));
    344 
    345   string a;
    346   CHECK(RE2::FullMatch("hello", "(foo)|hello", &a));
    347   CHECK_EQ(a, "");
    348 }
    349 
    350 TEST(RE2, Match) {
    351   RE2 re("((\\w+):([0-9]+))");   // extracts host and port
    352   StringPiece group[4];
    353 
    354   // No match.
    355   StringPiece s = "zyzzyva";
    356   CHECK(!re.Match(s, 0, s.size(), RE2::UNANCHORED,
    357                   group, arraysize(group)));
    358 
    359   // Matches and extracts.
    360   s = "a chrisr:9000 here";
    361   CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED,
    362                  group, arraysize(group)));
    363   CHECK_EQ(group[0], "chrisr:9000");
    364   CHECK_EQ(group[1], "chrisr:9000");
    365   CHECK_EQ(group[2], "chrisr");
    366   CHECK_EQ(group[3], "9000");
    367 
    368   string all, host;
    369   int port;
    370   CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
    371   CHECK_EQ(all, "chrisr:9000");
    372   CHECK_EQ(host, "chrisr");
    373   CHECK_EQ(port, 9000);
    374 }
    375 
    376 static void TestRecursion(int size, const char *pattern) {
    377   // Fill up a string repeating the pattern given
    378   string domain;
    379   domain.resize(size);
    380   int patlen = strlen(pattern);
    381   for (int i = 0; i < size; ++i) {
    382     domain[i] = pattern[i % patlen];
    383   }
    384   // Just make sure it doesn't crash due to too much recursion.
    385   RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
    386   RE2::FullMatch(domain, re);
    387 }
    388 
    389 // A meta-quoted string, interpreted as a pattern, should always match
    390 // the original unquoted string.
    391 static void TestQuoteMeta(string unquoted,
    392                           const RE2::Options& options = RE2::DefaultOptions) {
    393   string quoted = RE2::QuoteMeta(unquoted);
    394   RE2 re(quoted, options);
    395   EXPECT_TRUE_M(RE2::FullMatch(unquoted, re),
    396                 "Unquoted='" + unquoted + "', quoted='" + quoted + "'.");
    397 }
    398 
    399 // A meta-quoted string, interpreted as a pattern, should always match
    400 // the original unquoted string.
    401 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
    402                                   const RE2::Options& options = RE2::DefaultOptions) {
    403   string quoted = RE2::QuoteMeta(unquoted);
    404   RE2 re(quoted, options);
    405   EXPECT_FALSE_M(RE2::FullMatch(should_not_match, re),
    406                  "Unquoted='" + unquoted + "', quoted='" + quoted + "'.");
    407 }
    408 
    409 // Tests that quoted meta characters match their original strings,
    410 // and that a few things that shouldn't match indeed do not.
    411 TEST(QuoteMeta, Simple) {
    412   TestQuoteMeta("foo");
    413   TestQuoteMeta("foo.bar");
    414   TestQuoteMeta("foo\\.bar");
    415   TestQuoteMeta("[1-9]");
    416   TestQuoteMeta("1.5-2.0?");
    417   TestQuoteMeta("\\d");
    418   TestQuoteMeta("Who doesn't like ice cream?");
    419   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
    420   TestQuoteMeta("((?!)xxx).*yyy");
    421   TestQuoteMeta("([");
    422 }
    423 TEST(QuoteMeta, SimpleNegative) {
    424   NegativeTestQuoteMeta("foo", "bar");
    425   NegativeTestQuoteMeta("...", "bar");
    426   NegativeTestQuoteMeta("\\.", ".");
    427   NegativeTestQuoteMeta("\\.", "..");
    428   NegativeTestQuoteMeta("(a)", "a");
    429   NegativeTestQuoteMeta("(a|b)", "a");
    430   NegativeTestQuoteMeta("(a|b)", "(a)");
    431   NegativeTestQuoteMeta("(a|b)", "a|b");
    432   NegativeTestQuoteMeta("[0-9]", "0");
    433   NegativeTestQuoteMeta("[0-9]", "0-9");
    434   NegativeTestQuoteMeta("[0-9]", "[9]");
    435   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
    436 }
    437 
    438 TEST(QuoteMeta, Latin1) {
    439   TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
    440 }
    441 
    442 TEST(QuoteMeta, UTF8) {
    443   TestQuoteMeta("Plcido Domingo");
    444   TestQuoteMeta("xyz");  // No fancy utf8.
    445   TestQuoteMeta("\xc2\xb0");  // 2-byte utf8 -- a degree symbol.
    446   TestQuoteMeta("27\xc2\xb0 degrees");  // As a middle character.
    447   TestQuoteMeta("\xe2\x80\xb3");  // 3-byte utf8 -- a double prime.
    448   TestQuoteMeta("\xf0\x9d\x85\x9f");  // 4-byte utf8 -- a music note.
    449   TestQuoteMeta("27\xc2\xb0");  // Interpreted as Latin-1, this should
    450                                 // still work.
    451   NegativeTestQuoteMeta("27\xc2\xb0",
    452                         "27\\\xc2\\\xb0");  // 2-byte utf8 -- a degree symbol.
    453 }
    454 
    455 TEST(QuoteMeta, HasNull) {
    456   string has_null;
    457 
    458   // string with one null character
    459   has_null += '\0';
    460   TestQuoteMeta(has_null);
    461   NegativeTestQuoteMeta(has_null, "");
    462 
    463   // Don't want null-followed-by-'1' to be interpreted as '\01'.
    464   has_null += '1';
    465   TestQuoteMeta(has_null);
    466   NegativeTestQuoteMeta(has_null, "\1");
    467 }
    468 
    469 TEST(ProgramSize, BigProgram) {
    470   RE2 re_simple("simple regexp");
    471   RE2 re_medium("medium.*regexp");
    472   RE2 re_complex("hard.{1,128}regexp");
    473 
    474   CHECK_GT(re_simple.ProgramSize(), 0);
    475   CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
    476   CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
    477 }
    478 
    479 // Issue 956519: handling empty character sets was
    480 // causing NULL dereference.  This tests a few empty character sets.
    481 // (The way to get an empty character set is to negate a full one.)
    482 TEST(EmptyCharset, Fuzz) {
    483   static const char *empties[] = {
    484     "[^\\S\\s]",
    485     "[^\\S[:space:]]",
    486     "[^\\D\\d]",
    487     "[^\\D[:digit:]]"
    488   };
    489   for (int i = 0; i < arraysize(empties); i++)
    490     CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
    491 }
    492 
    493 // Test that named groups work correctly.
    494 TEST(Capture, NamedGroups) {
    495   {
    496     RE2 re("(hello world)");
    497     CHECK_EQ(re.NumberOfCapturingGroups(), 1);
    498     const map<string, int>& m = re.NamedCapturingGroups();
    499     CHECK_EQ(m.size(), 0);
    500   }
    501 
    502   {
    503     RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
    504     CHECK_EQ(re.NumberOfCapturingGroups(), 6);
    505     const map<string, int>& m = re.NamedCapturingGroups();
    506     CHECK_EQ(m.size(), 4);
    507     CHECK_EQ(m.find("A")->second, 1);
    508     CHECK_EQ(m.find("B")->second, 2);
    509     CHECK_EQ(m.find("C")->second, 3);
    510     CHECK_EQ(m.find("D")->second, 6);  // $4 and $5 are anonymous
    511   }
    512 }
    513 
    514 TEST(RE2, FullMatchWithNoArgs) {
    515   CHECK(RE2::FullMatch("h", "h"));
    516   CHECK(RE2::FullMatch("hello", "hello"));
    517   CHECK(RE2::FullMatch("hello", "h.*o"));
    518   CHECK(!RE2::FullMatch("othello", "h.*o"));       // Must be anchored at front
    519   CHECK(!RE2::FullMatch("hello!", "h.*o"));        // Must be anchored at end
    520 }
    521 
    522 TEST(RE2, PartialMatch) {
    523   CHECK(RE2::PartialMatch("x", "x"));
    524   CHECK(RE2::PartialMatch("hello", "h.*o"));
    525   CHECK(RE2::PartialMatch("othello", "h.*o"));
    526   CHECK(RE2::PartialMatch("hello!", "h.*o"));
    527   CHECK(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
    528 }
    529 
    530 TEST(RE2, PartialMatchN) {
    531   RE2::Arg argv[2];
    532   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
    533 
    534   // 0 arg
    535   EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
    536   EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
    537 
    538   // 1 arg
    539   int i;
    540   argv[0] = &i;
    541   EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
    542   EXPECT_EQ(1001, i);
    543   EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
    544 
    545   // Multi-arg
    546   string s;
    547   argv[1] = &s;
    548   EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
    549   EXPECT_EQ(42, i);
    550   EXPECT_EQ("life", s);
    551   EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
    552 }
    553 
    554 TEST(RE2, FullMatchZeroArg) {
    555   // Zero-arg
    556   CHECK(RE2::FullMatch("1001", "\\d+"));
    557 }
    558 
    559 TEST(RE2, FullMatchOneArg) {
    560   int i;
    561 
    562   // Single-arg
    563   CHECK(RE2::FullMatch("1001", "(\\d+)",   &i));
    564   CHECK_EQ(i, 1001);
    565   CHECK(RE2::FullMatch("-123", "(-?\\d+)", &i));
    566   CHECK_EQ(i, -123);
    567   CHECK(!RE2::FullMatch("10", "()\\d+", &i));
    568   CHECK(!RE2::FullMatch("1234567890123456789012345678901234567890",
    569                        "(\\d+)", &i));
    570 }
    571 
    572 TEST(RE2, FullMatchIntegerArg) {
    573   int i;
    574 
    575   // Digits surrounding integer-arg
    576   CHECK(RE2::FullMatch("1234", "1(\\d*)4", &i));
    577   CHECK_EQ(i, 23);
    578   CHECK(RE2::FullMatch("1234", "(\\d)\\d+", &i));
    579   CHECK_EQ(i, 1);
    580   CHECK(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
    581   CHECK_EQ(i, -1);
    582   CHECK(RE2::PartialMatch("1234", "(\\d)", &i));
    583   CHECK_EQ(i, 1);
    584   CHECK(RE2::PartialMatch("-1234", "(-\\d)", &i));
    585   CHECK_EQ(i, -1);
    586 }
    587 
    588 TEST(RE2, FullMatchStringArg) {
    589   string s;
    590   // String-arg
    591   CHECK(RE2::FullMatch("hello", "h(.*)o", &s));
    592   CHECK_EQ(s, string("ell"));
    593 }
    594 
    595 TEST(RE2, FullMatchStringPieceArg) {
    596   int i;
    597   // StringPiece-arg
    598   StringPiece sp;
    599   CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
    600   CHECK_EQ(sp.size(), 4);
    601   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
    602   CHECK_EQ(i, 1234);
    603 }
    604 
    605 TEST(RE2, FullMatchMultiArg) {
    606   int i;
    607   string s;
    608   // Multi-arg
    609   CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
    610   CHECK_EQ(s, string("ruby"));
    611   CHECK_EQ(i, 1234);
    612 }
    613 
    614 TEST(RE2, FullMatchN) {
    615   RE2::Arg argv[2];
    616   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
    617 
    618   // 0 arg
    619   EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
    620   EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
    621 
    622   // 1 arg
    623   int i;
    624   argv[0] = &i;
    625   EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
    626   EXPECT_EQ(1001, i);
    627   EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
    628 
    629   // Multi-arg
    630   string s;
    631   argv[1] = &s;
    632   EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
    633   EXPECT_EQ(42, i);
    634   EXPECT_EQ("life", s);
    635   EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
    636 }
    637 
    638 TEST(RE2, FullMatchIgnoredArg) {
    639   int i;
    640   string s;
    641   // Ignored arg
    642   CHECK(RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
    643   CHECK_EQ(s, string("ruby"));
    644   CHECK_EQ(i, 1234);
    645 }
    646 
    647 TEST(RE2, FullMatchTypedNullArg) {
    648   string s;
    649 
    650   // Ignore non-void* NULL arg
    651   CHECK(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
    652   CHECK(RE2::FullMatch("hello", "h(.*)o", (string*)NULL));
    653   CHECK(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
    654   CHECK(RE2::FullMatch("1234", "(.*)", (int*)NULL));
    655   CHECK(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
    656   CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
    657   CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
    658 
    659   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
    660   CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
    661   CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL));
    662   CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
    663   CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL));
    664   CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL));
    665 }
    666 
    667 #ifndef WIN32
    668 // Check that numeric parsing code does not read past the end of
    669 // the number being parsed.
    670 TEST(RE2, NULTerminated) {
    671   char *v;
    672   int x;
    673   long pagesize = sysconf(_SC_PAGE_SIZE);
    674 
    675 #ifndef MAP_ANONYMOUS
    676 #define MAP_ANONYMOUS MAP_ANON
    677 #endif
    678   v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
    679                               MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
    680   CHECK(v != reinterpret_cast<char*>(-1));
    681   LOG(INFO) << "Memory at " << (void*)v;
    682   CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
    683   v[pagesize - 1] = '1';
    684 
    685   x = 0;
    686   CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
    687   CHECK_EQ(x, 1);
    688 }
    689 #endif
    690 
    691 TEST(RE2, FullMatchTypeTests) {
    692   // Type tests
    693   string zeros(100, '0');
    694   {
    695     char c;
    696     CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
    697     CHECK_EQ(c, 'H');
    698   }
    699   {
    700     unsigned char c;
    701     CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
    702     CHECK_EQ(c, static_cast<unsigned char>('H'));
    703   }
    704   {
    705     int16 v;
    706     CHECK(RE2::FullMatch("100",     "(-?\\d+)", &v));    CHECK_EQ(v, 100);
    707     CHECK(RE2::FullMatch("-100",    "(-?\\d+)", &v));    CHECK_EQ(v, -100);
    708     CHECK(RE2::FullMatch("32767",   "(-?\\d+)", &v));    CHECK_EQ(v, 32767);
    709     CHECK(RE2::FullMatch("-32768",  "(-?\\d+)", &v));    CHECK_EQ(v, -32768);
    710     CHECK(!RE2::FullMatch("-32769", "(-?\\d+)", &v));
    711     CHECK(!RE2::FullMatch("32768",  "(-?\\d+)", &v));
    712   }
    713   {
    714     uint16 v;
    715     CHECK(RE2::FullMatch("100",     "(\\d+)", &v));    CHECK_EQ(v, 100);
    716     CHECK(RE2::FullMatch("32767",   "(\\d+)", &v));    CHECK_EQ(v, 32767);
    717     CHECK(RE2::FullMatch("65535",   "(\\d+)", &v));    CHECK_EQ(v, 65535);
    718     CHECK(!RE2::FullMatch("65536",  "(\\d+)", &v));
    719   }
    720   {
    721     int32 v;
    722     static const int32 max = 0x7fffffff;
    723     static const int32 min = -max - 1;
    724     CHECK(RE2::FullMatch("100",          "(-?\\d+)", &v)); CHECK_EQ(v, 100);
    725     CHECK(RE2::FullMatch("-100",         "(-?\\d+)", &v)); CHECK_EQ(v, -100);
    726     CHECK(RE2::FullMatch("2147483647",   "(-?\\d+)", &v)); CHECK_EQ(v, max);
    727     CHECK(RE2::FullMatch("-2147483648",  "(-?\\d+)", &v)); CHECK_EQ(v, min);
    728     CHECK(!RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
    729     CHECK(!RE2::FullMatch("2147483648",  "(-?\\d+)", &v));
    730 
    731     CHECK(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
    732     CHECK_EQ(v, max);
    733     CHECK(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
    734     CHECK_EQ(v, min);
    735 
    736     CHECK(!RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
    737     CHECK(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
    738     CHECK_EQ(v, max);
    739     CHECK(!RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
    740   }
    741   {
    742     uint32 v;
    743     static const uint32 max = 0xfffffffful;
    744     CHECK(RE2::FullMatch("100",         "(\\d+)", &v)); CHECK_EQ(v, 100);
    745     CHECK(RE2::FullMatch("4294967295",  "(\\d+)", &v)); CHECK_EQ(v, max);
    746     CHECK(!RE2::FullMatch("4294967296", "(\\d+)", &v));
    747     CHECK(!RE2::FullMatch("-1",         "(\\d+)", &v));
    748 
    749     CHECK(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); CHECK_EQ(v, max);
    750   }
    751   {
    752     int64 v;
    753     static const int64 max = 0x7fffffffffffffffull;
    754     static const int64 min = -max - 1;
    755     char buf[32];
    756 
    757     CHECK(RE2::FullMatch("100",  "(-?\\d+)", &v)); CHECK_EQ(v, 100);
    758     CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100);
    759 
    760     snprintf(buf, sizeof(buf), "%lld", (long long int)max);
    761     CHECK(RE2::FullMatch(buf,    "(-?\\d+)", &v)); CHECK_EQ(v, max);
    762 
    763     snprintf(buf, sizeof(buf), "%lld", (long long int)min);
    764     CHECK(RE2::FullMatch(buf,    "(-?\\d+)", &v)); CHECK_EQ(v, min);
    765 
    766     snprintf(buf, sizeof(buf), "%lld", (long long int)max);
    767     assert(buf[strlen(buf)-1] != '9');
    768     buf[strlen(buf)-1]++;
    769     CHECK(!RE2::FullMatch(buf,   "(-?\\d+)", &v));
    770 
    771     snprintf(buf, sizeof(buf), "%lld", (long long int)min);
    772     assert(buf[strlen(buf)-1] != '9');
    773     buf[strlen(buf)-1]++;
    774     CHECK(!RE2::FullMatch(buf,   "(-?\\d+)", &v));
    775   }
    776   {
    777     uint64 v;
    778     int64 v2;
    779     static const uint64 max = 0xffffffffffffffffull;
    780     char buf[32];
    781 
    782     CHECK(RE2::FullMatch("100",  "(-?\\d+)", &v));  CHECK_EQ(v, 100);
    783     CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v2)); CHECK_EQ(v2, -100);
    784 
    785     snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max);
    786     CHECK(RE2::FullMatch(buf,    "(-?\\d+)", &v)); CHECK_EQ(v, max);
    787 
    788     assert(buf[strlen(buf)-1] != '9');
    789     buf[strlen(buf)-1]++;
    790     CHECK(!RE2::FullMatch(buf,   "(-?\\d+)", &v));
    791   }
    792 }
    793 
    794 TEST(RE2, FloatingPointFullMatchTypes) {
    795   string zeros(100, '0');
    796   {
    797     float v;
    798     CHECK(RE2::FullMatch("100",   "(.*)", &v));  CHECK_EQ(v, 100);
    799     CHECK(RE2::FullMatch("-100.", "(.*)", &v));  CHECK_EQ(v, -100);
    800     CHECK(RE2::FullMatch("1e23",  "(.*)", &v));  CHECK_EQ(v, float(1e23));
    801 
    802     CHECK(RE2::FullMatch(zeros + "1e23",  "(.*)", &v));
    803     CHECK_EQ(v, float(1e23));
    804 
    805     // 6700000000081920.1 is an edge case.
    806     // 6700000000081920 is exactly halfway between
    807     // two float32s, so the .1 should make it round up.
    808     // However, the .1 is outside the precision possible with
    809     // a float64: the nearest float64 is 6700000000081920.
    810     // So if the code uses strtod and then converts to float32,
    811     // round-to-even will make it round down instead of up.
    812     // To pass the test, the parser must call strtof directly.
    813     // This test case is carefully chosen to use only a 17-digit
    814     // number, since C does not guarantee to get the correctly
    815     // rounded answer for strtod and strtof unless the input is
    816     // short.
    817     CHECK(RE2::FullMatch("0.1", "(.*)", &v));
    818     CHECK_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
    819     CHECK(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
    820     CHECK_EQ(v, 6700000000081920.1f)
    821       << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
    822   }
    823   {
    824     double v;
    825     CHECK(RE2::FullMatch("100",   "(.*)", &v));  CHECK_EQ(v, 100);
    826     CHECK(RE2::FullMatch("-100.", "(.*)", &v));  CHECK_EQ(v, -100);
    827     CHECK(RE2::FullMatch("1e23",  "(.*)", &v));  CHECK_EQ(v, 1e23);
    828     CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
    829     CHECK_EQ(v, double(1e23));
    830 
    831     CHECK(RE2::FullMatch("0.1", "(.*)", &v));
    832     CHECK_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
    833     CHECK(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
    834     CHECK_EQ(v, 1.0000000596046448)
    835       << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
    836   }
    837 }
    838 
    839 TEST(RE2, FullMatchAnchored) {
    840   int i;
    841   // Check that matching is fully anchored
    842   CHECK(!RE2::FullMatch("x1001", "(\\d+)",  &i));
    843   CHECK(!RE2::FullMatch("1001x", "(\\d+)",  &i));
    844   CHECK(RE2::FullMatch("x1001",  "x(\\d+)", &i)); CHECK_EQ(i, 1001);
    845   CHECK(RE2::FullMatch("1001x",  "(\\d+)x", &i)); CHECK_EQ(i, 1001);
    846 }
    847 
    848 TEST(RE2, FullMatchBraces) {
    849   // Braces
    850   CHECK(RE2::FullMatch("0abcd",  "[0-9a-f+.-]{5,}"));
    851   CHECK(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
    852   CHECK(!RE2::FullMatch("0abc",  "[0-9a-f+.-]{5,}"));
    853 }
    854 
    855 TEST(RE2, Complicated) {
    856   // Complicated RE2
    857   CHECK(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
    858   CHECK(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
    859   CHECK(RE2::FullMatch("X",   "foo|bar|[A-Z]"));
    860   CHECK(!RE2::FullMatch("XY", "foo|bar|[A-Z]"));
    861 }
    862 
    863 TEST(RE2, FullMatchEnd) {
    864   // Check full-match handling (needs '$' tacked on internally)
    865   CHECK(RE2::FullMatch("fo", "fo|foo"));
    866   CHECK(RE2::FullMatch("foo", "fo|foo"));
    867   CHECK(RE2::FullMatch("fo", "fo|foo$"));
    868   CHECK(RE2::FullMatch("foo", "fo|foo$"));
    869   CHECK(RE2::FullMatch("foo", "foo$"));
    870   CHECK(!RE2::FullMatch("foo$bar", "foo\\$"));
    871   CHECK(!RE2::FullMatch("fox", "fo|bar"));
    872 
    873   // Uncomment the following if we change the handling of '$' to
    874   // prevent it from matching a trailing newline
    875   if (false) {
    876     // Check that we don't get bitten by pcre's special handling of a
    877     // '\n' at the end of the string matching '$'
    878     CHECK(!RE2::PartialMatch("foo\n", "foo$"));
    879   }
    880 }
    881 
    882 TEST(RE2, FullMatchArgCount) {
    883   // Number of args
    884   int a[16];
    885   CHECK(RE2::FullMatch("", ""));
    886 
    887   memset(a, 0, sizeof(0));
    888   CHECK(RE2::FullMatch("1",
    889                       "(\\d){1}",
    890                       &a[0]));
    891   CHECK_EQ(a[0], 1);
    892 
    893   memset(a, 0, sizeof(0));
    894   CHECK(RE2::FullMatch("12",
    895                       "(\\d)(\\d)",
    896                       &a[0],  &a[1]));
    897   CHECK_EQ(a[0], 1);
    898   CHECK_EQ(a[1], 2);
    899 
    900   memset(a, 0, sizeof(0));
    901   CHECK(RE2::FullMatch("123",
    902                       "(\\d)(\\d)(\\d)",
    903                       &a[0],  &a[1],  &a[2]));
    904   CHECK_EQ(a[0], 1);
    905   CHECK_EQ(a[1], 2);
    906   CHECK_EQ(a[2], 3);
    907 
    908   memset(a, 0, sizeof(0));
    909   CHECK(RE2::FullMatch("1234",
    910                       "(\\d)(\\d)(\\d)(\\d)",
    911                       &a[0],  &a[1],  &a[2],  &a[3]));
    912   CHECK_EQ(a[0], 1);
    913   CHECK_EQ(a[1], 2);
    914   CHECK_EQ(a[2], 3);
    915   CHECK_EQ(a[3], 4);
    916 
    917   memset(a, 0, sizeof(0));
    918   CHECK(RE2::FullMatch("12345",
    919                       "(\\d)(\\d)(\\d)(\\d)(\\d)",
    920                       &a[0],  &a[1],  &a[2],  &a[3],
    921                       &a[4]));
    922   CHECK_EQ(a[0], 1);
    923   CHECK_EQ(a[1], 2);
    924   CHECK_EQ(a[2], 3);
    925   CHECK_EQ(a[3], 4);
    926   CHECK_EQ(a[4], 5);
    927 
    928   memset(a, 0, sizeof(0));
    929   CHECK(RE2::FullMatch("123456",
    930                       "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
    931                       &a[0],  &a[1],  &a[2],  &a[3],
    932                       &a[4],  &a[5]));
    933   CHECK_EQ(a[0], 1);
    934   CHECK_EQ(a[1], 2);
    935   CHECK_EQ(a[2], 3);
    936   CHECK_EQ(a[3], 4);
    937   CHECK_EQ(a[4], 5);
    938   CHECK_EQ(a[5], 6);
    939 
    940   memset(a, 0, sizeof(0));
    941   CHECK(RE2::FullMatch("1234567",
    942                       "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
    943                       &a[0],  &a[1],  &a[2],  &a[3],
    944                       &a[4],  &a[5],  &a[6]));
    945   CHECK_EQ(a[0], 1);
    946   CHECK_EQ(a[1], 2);
    947   CHECK_EQ(a[2], 3);
    948   CHECK_EQ(a[3], 4);
    949   CHECK_EQ(a[4], 5);
    950   CHECK_EQ(a[5], 6);
    951   CHECK_EQ(a[6], 7);
    952 
    953   memset(a, 0, sizeof(0));
    954   CHECK(RE2::FullMatch("1234567890123456",
    955                       "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
    956                       "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
    957                       &a[0],  &a[1],  &a[2],  &a[3],
    958                       &a[4],  &a[5],  &a[6],  &a[7],
    959                       &a[8],  &a[9],  &a[10], &a[11],
    960                       &a[12], &a[13], &a[14], &a[15]));
    961   CHECK_EQ(a[0], 1);
    962   CHECK_EQ(a[1], 2);
    963   CHECK_EQ(a[2], 3);
    964   CHECK_EQ(a[3], 4);
    965   CHECK_EQ(a[4], 5);
    966   CHECK_EQ(a[5], 6);
    967   CHECK_EQ(a[6], 7);
    968   CHECK_EQ(a[7], 8);
    969   CHECK_EQ(a[8], 9);
    970   CHECK_EQ(a[9], 0);
    971   CHECK_EQ(a[10], 1);
    972   CHECK_EQ(a[11], 2);
    973   CHECK_EQ(a[12], 3);
    974   CHECK_EQ(a[13], 4);
    975   CHECK_EQ(a[14], 5);
    976   CHECK_EQ(a[15], 6);
    977 }
    978 
    979 TEST(RE2, Accessors) {
    980   // Check the pattern() accessor
    981   {
    982     const string kPattern = "http://([^/]+)/.*";
    983     const RE2 re(kPattern);
    984     CHECK_EQ(kPattern, re.pattern());
    985   }
    986 
    987   // Check RE2 error field.
    988   {
    989     RE2 re("foo");
    990     CHECK(re.error().empty());  // Must have no error
    991     CHECK(re.ok());
    992     CHECK(re.error_code() == RE2::NoError);
    993   }
    994 }
    995 
    996 TEST(RE2, UTF8) {
    997   // Check UTF-8 handling
    998   // Three Japanese characters (nihongo)
    999   const char utf8_string[] = {
   1000        0xe6, 0x97, 0xa5, // 65e5
   1001        0xe6, 0x9c, 0xac, // 627c
   1002        0xe8, 0xaa, 0x9e, // 8a9e
   1003        0
   1004   };
   1005   const char utf8_pattern[] = {
   1006        '.',
   1007        0xe6, 0x9c, 0xac, // 627c
   1008        '.',
   1009        0
   1010   };
   1011 
   1012   // Both should match in either mode, bytes or UTF-8
   1013   RE2 re_test1(".........", RE2::Latin1);
   1014   CHECK(RE2::FullMatch(utf8_string, re_test1));
   1015   RE2 re_test2("...");
   1016   CHECK(RE2::FullMatch(utf8_string, re_test2));
   1017 
   1018   // Check that '.' matches one byte or UTF-8 character
   1019   // according to the mode.
   1020   string s;
   1021   RE2 re_test3("(.)", RE2::Latin1);
   1022   CHECK(RE2::PartialMatch(utf8_string, re_test3, &s));
   1023   CHECK_EQ(s, string("\xe6"));
   1024   RE2 re_test4("(.)");
   1025   CHECK(RE2::PartialMatch(utf8_string, re_test4, &s));
   1026   CHECK_EQ(s, string("\xe6\x97\xa5"));
   1027 
   1028   // Check that string matches itself in either mode
   1029   RE2 re_test5(utf8_string, RE2::Latin1);
   1030   CHECK(RE2::FullMatch(utf8_string, re_test5));
   1031   RE2 re_test6(utf8_string);
   1032   CHECK(RE2::FullMatch(utf8_string, re_test6));
   1033 
   1034   // Check that pattern matches string only in UTF8 mode
   1035   RE2 re_test7(utf8_pattern, RE2::Latin1);
   1036   CHECK(!RE2::FullMatch(utf8_string, re_test7));
   1037   RE2 re_test8(utf8_pattern);
   1038   CHECK(RE2::FullMatch(utf8_string, re_test8));
   1039 }
   1040 
   1041 TEST(RE2, UngreedyUTF8) {
   1042   // Check that ungreedy, UTF8 regular expressions don't match when they
   1043   // oughtn't -- see bug 82246.
   1044   {
   1045     // This code always worked.
   1046     const char* pattern = "\\w+X";
   1047     const string target = "a aX";
   1048     RE2 match_sentence(pattern, RE2::Latin1);
   1049     RE2 match_sentence_re(pattern);
   1050 
   1051     CHECK(!RE2::FullMatch(target, match_sentence));
   1052     CHECK(!RE2::FullMatch(target, match_sentence_re));
   1053   }
   1054   {
   1055     const char* pattern = "(?U)\\w+X";
   1056     const string target = "a aX";
   1057     RE2 match_sentence(pattern, RE2::Latin1);
   1058     CHECK_EQ(match_sentence.error(), "");
   1059     RE2 match_sentence_re(pattern);
   1060 
   1061     CHECK(!RE2::FullMatch(target, match_sentence));
   1062     CHECK(!RE2::FullMatch(target, match_sentence_re));
   1063   }
   1064 }
   1065 
   1066 TEST(RE2, Rejects) {
   1067   { RE2 re("a\\1", RE2::Quiet); CHECK(!re.ok()); }
   1068   {
   1069     RE2 re("a[x", RE2::Quiet);
   1070     CHECK(!re.ok());
   1071   }
   1072   {
   1073     RE2 re("a[z-a]", RE2::Quiet);
   1074     CHECK(!re.ok());
   1075   }
   1076   {
   1077     RE2 re("a[[:foobar:]]", RE2::Quiet);
   1078     CHECK(!re.ok());
   1079   }
   1080   {
   1081     RE2 re("a(b", RE2::Quiet);
   1082     CHECK(!re.ok());
   1083   }
   1084   {
   1085     RE2 re("a\\", RE2::Quiet);
   1086     CHECK(!re.ok());
   1087   }
   1088 }
   1089 
   1090 TEST(RE2, NoCrash) {
   1091   // Test that using a bad regexp doesn't crash.
   1092   {
   1093     RE2 re("a\\", RE2::Quiet);
   1094     CHECK(!re.ok());
   1095     CHECK(!RE2::PartialMatch("a\\b", re));
   1096   }
   1097 
   1098   // Test that using an enormous regexp doesn't crash
   1099   {
   1100     RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
   1101     CHECK(!re.ok());
   1102     CHECK(!RE2::PartialMatch("aaa", re));
   1103   }
   1104 
   1105   // Test that a crazy regexp still compiles and runs.
   1106   {
   1107     RE2 re(".{512}x", RE2::Quiet);
   1108     CHECK(re.ok());
   1109     string s;
   1110     s.append(515, 'c');
   1111     s.append("x");
   1112     CHECK(RE2::PartialMatch(s, re));
   1113   }
   1114 }
   1115 
   1116 TEST(RE2, Recursion) {
   1117   // Test that recursion is stopped.
   1118   // This test is PCRE-legacy -- there's no recursion in RE2.
   1119   int bytes = 15 * 1024;  // enough to crash PCRE
   1120   TestRecursion(bytes, ".");
   1121   TestRecursion(bytes, "a");
   1122   TestRecursion(bytes, "a.");
   1123   TestRecursion(bytes, "ab.");
   1124   TestRecursion(bytes, "abc.");
   1125 }
   1126 
   1127 TEST(RE2, BigCountedRepetition) {
   1128   // Test that counted repetition works, given tons of memory.
   1129   RE2::Options opt;
   1130   opt.set_max_mem(256<<20);
   1131 
   1132   RE2 re(".{512}x", opt);
   1133   CHECK(re.ok());
   1134   string s;
   1135   s.append(515, 'c');
   1136   s.append("x");
   1137   CHECK(RE2::PartialMatch(s, re));
   1138 }
   1139 
   1140 TEST(RE2, DeepRecursion) {
   1141   // Test for deep stack recursion.  This would fail with a
   1142   // segmentation violation due to stack overflow before pcre was
   1143   // patched.
   1144   // Again, a PCRE legacy test.  RE2 doesn't recurse.
   1145   string comment("x*");
   1146   string a(131072, 'a');
   1147   comment += a;
   1148   comment += "*x";
   1149   RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
   1150   CHECK(RE2::FullMatch(comment, re));
   1151 }
   1152 
   1153 // Suggested by Josh Hyman.  Failed when SearchOnePass was
   1154 // not implementing case-folding.
   1155 TEST(CaseInsensitive, MatchAndConsume) {
   1156   string result;
   1157   string text = "A fish named *Wanda*";
   1158   StringPiece sp(text);
   1159 
   1160   EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result));
   1161   EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
   1162 }
   1163 
   1164 // RE2 should permit implicit conversions from string, StringPiece, const char*,
   1165 // and C string literals.
   1166 TEST(RE2, ImplicitConversions) {
   1167   string re_string(".");
   1168   StringPiece re_stringpiece(".");
   1169   const char* re_cstring = ".";
   1170   EXPECT_TRUE(RE2::PartialMatch("e", re_string));
   1171   EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
   1172   EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
   1173   EXPECT_TRUE(RE2::PartialMatch("e", "."));
   1174 }
   1175 
   1176 // Bugs introduced by 8622304
   1177 TEST(RE2, CL8622304) {
   1178   // reported by ingow
   1179   string dir;
   1180   EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])"));  // ok
   1181   EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir));  // fails
   1182 
   1183   // reported by jacobsa
   1184   string key, val;
   1185   EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
   1186               "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
   1187               &key,
   1188               &val));
   1189   EXPECT_EQ(key, "bar");
   1190   EXPECT_EQ(val, "1,0x2F,030,4,5");
   1191 }
   1192 
   1193 
   1194 // Check that RE2 returns correct regexp pieces on error.
   1195 // In particular, make sure it returns whole runes
   1196 // and that it always reports invalid UTF-8.
   1197 // Also check that Perl error flag piece is big enough.
   1198 static struct ErrorTest {
   1199   const char *regexp;
   1200   const char *error;
   1201 } error_tests[] = {
   1202   { "ab\\cd", "\\" },
   1203   { "ef\\x01", "\\x0" },
   1204   { "gh\\x101", "\\x1" },
   1205   { "ij\\x1", "\\x1" },
   1206   { "kl\\x", "\\x" },
   1207   { "uv\\x{0000}", "\\x{0000" },
   1208   { "wx\\p{ABC", "\\p{ABC" },
   1209   { "yz(?smiUX:abc)", "(?smiUX" },   // used to return (?s but the error is X
   1210   { "aa(?smi", "(?sm" },
   1211   { "bb[abc", "[abc" },
   1212 
   1213   { "mn\\x1\377", "" },  // no argument string returned for invalid UTF-8
   1214   { "op\377qr", "" },
   1215   { "st\\x{00000\377", "" },
   1216   { "zz\\p{\377}", "" },
   1217   { "zz\\x{00\377}", "" },
   1218   { "zz(?P<name\377>abc)", "" },
   1219 };
   1220 TEST(RE2, ErrorArgs) {
   1221   for (int i = 0; i < arraysize(error_tests); i++) {
   1222     RE2 re(error_tests[i].regexp, RE2::Quiet);
   1223     EXPECT_FALSE(re.ok());
   1224     EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
   1225   }
   1226 }
   1227 
   1228 // Check that "never match \n" mode never matches \n.
   1229 static struct NeverTest {
   1230   const char* regexp;
   1231   const char* text;
   1232   const char* match;
   1233 } never_tests[] = {
   1234   { "(.*)", "abc\ndef\nghi\n", "abc" },
   1235   { "(?s)(abc.*def)", "abc\ndef\n", NULL },
   1236   { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
   1237   { "(abc[^x]*def)", "abc\ndef\n", NULL },
   1238   { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
   1239 };
   1240 TEST(RE2, NeverNewline) {
   1241   RE2::Options opt;
   1242   opt.set_never_nl(true);
   1243   for (int i = 0; i < arraysize(never_tests); i++) {
   1244     const NeverTest& t = never_tests[i];
   1245     RE2 re(t.regexp, opt);
   1246     if (t.match == NULL) {
   1247       EXPECT_FALSE(re.PartialMatch(t.text, re));
   1248     } else {
   1249       StringPiece m;
   1250       EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
   1251       EXPECT_EQ(m, t.match);
   1252     }
   1253   }
   1254 }
   1255 
   1256 // Check that there are no capturing groups in "never capture" mode.
   1257 TEST(RE2, NeverCapture) {
   1258   RE2::Options opt;
   1259   opt.set_never_capture(true);
   1260   RE2 re("(r)(e)", opt);
   1261   EXPECT_EQ(0, re.NumberOfCapturingGroups());
   1262 }
   1263 
   1264 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
   1265 // Triggered by a failed DFA search falling back to Bitstate when
   1266 // using Match with a NULL submatch set.  Bitstate tried to read
   1267 // the submatch[0] entry even if nsubmatch was 0.
   1268 TEST(RE2, BitstateCaptureBug) {
   1269   RE2::Options opt;
   1270   opt.set_max_mem(20000);
   1271   RE2 re("(_________$)", opt);
   1272   StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
   1273   EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
   1274 }
   1275 
   1276 // C++ version of bug 609710.
   1277 TEST(RE2, UnicodeClasses) {
   1278   const string str = "ABCDEFGHI";
   1279   string a, b, c;
   1280 
   1281   EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
   1282   EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
   1283   EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
   1284   EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
   1285   EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
   1286   EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
   1287 
   1288   EXPECT_TRUE(RE2::FullMatch("", "\\p{L}"));
   1289   EXPECT_FALSE(RE2::FullMatch("", "\\p{Lu}"));
   1290   EXPECT_FALSE(RE2::FullMatch("", "\\p{Ll}"));
   1291   EXPECT_FALSE(RE2::FullMatch("", "\\P{L}"));
   1292   EXPECT_TRUE(RE2::FullMatch("", "\\P{Lu}"));
   1293   EXPECT_TRUE(RE2::FullMatch("", "\\P{Ll}"));
   1294 
   1295   EXPECT_TRUE(RE2::FullMatch("", "\\p{L}"));
   1296   EXPECT_FALSE(RE2::FullMatch("", "\\p{Lu}"));
   1297   EXPECT_FALSE(RE2::FullMatch("", "\\p{Ll}"));
   1298   EXPECT_FALSE(RE2::FullMatch("", "\\P{L}"));
   1299   EXPECT_TRUE(RE2::FullMatch("", "\\P{Lu}"));
   1300   EXPECT_TRUE(RE2::FullMatch("", "\\P{Ll}"));
   1301 
   1302   EXPECT_TRUE(RE2::FullMatch("", "\\p{L}"));
   1303   EXPECT_FALSE(RE2::FullMatch("", "\\p{Lu}"));
   1304   EXPECT_FALSE(RE2::FullMatch("", "\\p{Ll}"));
   1305   EXPECT_FALSE(RE2::FullMatch("", "\\P{L}"));
   1306   EXPECT_TRUE(RE2::FullMatch("", "\\P{Lu}"));
   1307   EXPECT_TRUE(RE2::FullMatch("", "\\P{Ll}"));
   1308 
   1309   EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
   1310   EXPECT_EQ("A", a);
   1311   EXPECT_EQ("B", b);
   1312   EXPECT_EQ("C", c);
   1313 
   1314   EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
   1315   EXPECT_EQ("A", a);
   1316   EXPECT_EQ("B", b);
   1317   EXPECT_EQ("C", c);
   1318 
   1319   EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
   1320 
   1321   EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
   1322   EXPECT_EQ("A", a);
   1323   EXPECT_EQ("B", b);
   1324   EXPECT_EQ("C", c);
   1325 
   1326   EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
   1327 
   1328   EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
   1329   EXPECT_EQ("", a);
   1330   EXPECT_EQ("", b);
   1331   EXPECT_EQ("", c);
   1332 }
   1333 
   1334 // Bug reported by saito. 2009/02/17
   1335 TEST(RE2, NullVsEmptyString) {
   1336   RE2 re2(".*");
   1337   StringPiece v1("");
   1338   EXPECT_TRUE(RE2::FullMatch(v1, re2));
   1339 
   1340   StringPiece v2;
   1341   EXPECT_TRUE(RE2::FullMatch(v2, re2));
   1342 }
   1343 
   1344 // Issue 1816809
   1345 TEST(RE2, Bug1816809) {
   1346   RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
   1347   StringPiece piece("llx-3;llx4");
   1348   string x;
   1349   EXPECT_TRUE(RE2::Consume(&piece, re, &x));
   1350 }
   1351 
   1352 // Issue 3061120
   1353 TEST(RE2, Bug3061120) {
   1354   RE2 re("(?i)\\W");
   1355   EXPECT_FALSE(RE2::PartialMatch("x", re));  // always worked
   1356   EXPECT_FALSE(RE2::PartialMatch("k", re));  // broke because of kelvin
   1357   EXPECT_FALSE(RE2::PartialMatch("s", re));  // broke because of latin long s
   1358 }
   1359 
   1360 TEST(RE2, CapturingGroupNames) {
   1361   // Opening parentheses annotated with group IDs:
   1362   //      12    3        45   6         7
   1363   RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
   1364   EXPECT_TRUE(re.ok());
   1365   const map<int, string>& have = re.CapturingGroupNames();
   1366   map<int, string> want;
   1367   want[3] = "G2";
   1368   want[6] = "G2";
   1369   want[7] = "G1";
   1370   EXPECT_EQ(want, have);
   1371 }
   1372 
   1373 TEST(RE2, RegexpToStringLossOfAnchor) {
   1374   EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
   1375   EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
   1376   EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
   1377   EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
   1378 }
   1379 
   1380 }  // namespace re2
   1381