Home | History | Annotate | Download | only in testing
      1 // -*- coding: utf-8 -*-
      2 // Copyright 2002-2009 The RE2 Authors.  All Rights Reserved.
      3 // Use of this source code is governed by a BSD-style
      4 // license that can be found in the LICENSE file.
      5 
      6 // TODO: Test extractions for PartialMatch/Consume
      7 
      8 #include <sys/types.h>
      9 #include <sys/mman.h>
     10 #include <sys/stat.h>
     11 #include <errno.h>
     12 #include <vector>
     13 #include "util/test.h"
     14 #include "re2/re2.h"
     15 #include "re2/regexp.h"
     16 
     17 DECLARE_bool(logtostderr);
     18 
     19 namespace re2 {
     20 
     21 TEST(RE2, HexTests) {
     22 
     23   VLOG(1) << "hex tests";
     24 
     25 #define CHECK_HEX(type, value) \
     26   do { \
     27     type v; \
     28     CHECK(RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
     29     CHECK_EQ(v, 0x ## value); \
     30     CHECK(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
     31     CHECK_EQ(v, 0x ## value); \
     32   } while(0)
     33 
     34   CHECK_HEX(short,              2bad);
     35   CHECK_HEX(unsigned short,     2badU);
     36   CHECK_HEX(int,                dead);
     37   CHECK_HEX(unsigned int,       deadU);
     38   CHECK_HEX(long,               7eadbeefL);
     39   CHECK_HEX(unsigned long,      deadbeefUL);
     40   CHECK_HEX(long long,          12345678deadbeefLL);
     41   CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
     42 
     43 #undef CHECK_HEX
     44 }
     45 
     46 TEST(RE2, OctalTests) {
     47   VLOG(1) << "octal tests";
     48 
     49 #define CHECK_OCTAL(type, value) \
     50   do { \
     51     type v; \
     52     CHECK(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
     53     CHECK_EQ(v, 0 ## value); \
     54     CHECK(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
     55     CHECK_EQ(v, 0 ## value); \
     56   } while(0)
     57 
     58   CHECK_OCTAL(short,              77777);
     59   CHECK_OCTAL(unsigned short,     177777U);
     60   CHECK_OCTAL(int,                17777777777);
     61   CHECK_OCTAL(unsigned int,       37777777777U);
     62   CHECK_OCTAL(long,               17777777777L);
     63   CHECK_OCTAL(unsigned long,      37777777777UL);
     64   CHECK_OCTAL(long long,          777777777777777777777LL);
     65   CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
     66 
     67 #undef CHECK_OCTAL
     68 }
     69 
     70 TEST(RE2, DecimalTests) {
     71   VLOG(1) << "decimal tests";
     72 
     73 #define CHECK_DECIMAL(type, value) \
     74   do { \
     75     type v; \
     76     CHECK(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \
     77     CHECK_EQ(v, value); \
     78     CHECK(RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
     79     CHECK_EQ(v, value); \
     80   } while(0)
     81 
     82   CHECK_DECIMAL(short,              -1);
     83   CHECK_DECIMAL(unsigned short,     9999);
     84   CHECK_DECIMAL(int,                -1000);
     85   CHECK_DECIMAL(unsigned int,       12345U);
     86   CHECK_DECIMAL(long,               -10000000L);
     87   CHECK_DECIMAL(unsigned long,      3083324652U);
     88   CHECK_DECIMAL(long long,          -100000000000000LL);
     89   CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
     90 
     91 #undef CHECK_DECIMAL
     92 }
     93 
     94 TEST(RE2, Replace) {
     95   VLOG(1) << "TestReplace";
     96 
     97   struct ReplaceTest {
     98     const char *regexp;
     99     const char *rewrite;
    100     const char *original;
    101     const char *single;
    102     const char *global;
    103     int        greplace_count;
    104   };
    105   static const ReplaceTest tests[] = {
    106     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
    107       "\\2\\1ay",
    108       "the quick brown fox jumps over the lazy dogs.",
    109       "ethay quick brown fox jumps over the lazy dogs.",
    110       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
    111       9 },
    112     { "\\w+",
    113       "\\0-NOSPAM",
    114       "abcd.efghi (at) google.com",
    115       "abcd-NOSPAM.efghi (at) google.com",
    116       "abcd-NOSPAM.efghi-NOSPAM (at) google-NOSPAM.com-NOSPAM",
    117       4 },
    118     { "^",
    119       "(START)",
    120       "foo",
    121       "(START)foo",
    122       "(START)foo",
    123       1 },
    124     { "^",
    125       "(START)",
    126       "",
    127       "(START)",
    128       "(START)",
    129       1 },
    130     { "$",
    131       "(END)",
    132       "",
    133       "(END)",
    134       "(END)",
    135       1 },
    136     { "b",
    137       "bb",
    138       "ababababab",
    139       "abbabababab",
    140       "abbabbabbabbabb",
    141       5 },
    142     { "b",
    143       "bb",
    144       "bbbbbb",
    145       "bbbbbbb",
    146       "bbbbbbbbbbbb",
    147       6 },
    148     { "b+",
    149       "bb",
    150       "bbbbbb",
    151       "bb",
    152       "bb",
    153       1 },
    154     { "b*",
    155       "bb",
    156       "bbbbbb",
    157       "bb",
    158       "bb",
    159       1 },
    160     { "b*",
    161       "bb",
    162       "aaaaa",
    163       "bbaaaaa",
    164       "bbabbabbabbabbabb",
    165       6 },
    166     // Check newline handling
    167     { "a.*a",
    168       "(\\0)",
    169       "aba\naba",
    170       "(aba)\naba",
    171       "(aba)\n(aba)",
    172       2 },
    173     { "", NULL, NULL, NULL, NULL, 0 }
    174   };
    175 
    176   for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
    177     VLOG(1) << StringPrintf("\"%s\" =~ s/%s/%s/g", t->original, t->regexp, t->rewrite);
    178     string one(t->original);
    179     CHECK(RE2::Replace(&one, t->regexp, t->rewrite));
    180     CHECK_EQ(one, t->single);
    181     string all(t->original);
    182     CHECK_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
    183       << "Got: " << all;
    184     CHECK_EQ(all, t->global);
    185   }
    186 }
    187 
    188 static void TestCheckRewriteString(const char* regexp, const char* rewrite,
    189                               bool expect_ok) {
    190   string error;
    191   RE2 exp(regexp);
    192   bool actual_ok = exp.CheckRewriteString(rewrite, &error);
    193   EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
    194 }
    195 
    196 TEST(CheckRewriteString, all) {
    197   TestCheckRewriteString("abc", "foo", true);
    198   TestCheckRewriteString("abc", "foo\\", false);
    199   TestCheckRewriteString("abc", "foo\\0bar", true);
    200 
    201   TestCheckRewriteString("a(b)c", "foo", true);
    202   TestCheckRewriteString("a(b)c", "foo\\0bar", true);
    203   TestCheckRewriteString("a(b)c", "foo\\1bar", true);
    204   TestCheckRewriteString("a(b)c", "foo\\2bar", false);
    205   TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
    206 
    207   TestCheckRewriteString("a(b)(c)", "foo\\12", true);
    208   TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
    209   TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
    210 }
    211 
    212 TEST(RE2, Extract) {
    213   VLOG(1) << "TestExtract";
    214 
    215   string s;
    216 
    217   CHECK(RE2::Extract("boris (at) kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
    218   CHECK_EQ(s, "kremvax!boris");
    219 
    220   CHECK(RE2::Extract("foo", ".*", "'\\0'", &s));
    221   CHECK_EQ(s, "'foo'");
    222   // check that false match doesn't overwrite
    223   CHECK(!RE2::Extract("baz", "bar", "'\\0'", &s));
    224   CHECK_EQ(s, "'foo'");
    225 }
    226 
    227 TEST(RE2, Consume) {
    228   VLOG(1) << "TestConsume";
    229 
    230   RE2 r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
    231   string word;
    232 
    233   string s("   aaa b!@#$@#$cccc");
    234   StringPiece input(s);
    235 
    236   CHECK(RE2::Consume(&input, r, &word));
    237   CHECK_EQ(word, "aaa") << " input: " << input;
    238   CHECK(RE2::Consume(&input, r, &word));
    239   CHECK_EQ(word, "b") << " input: " << input;
    240   CHECK(! RE2::Consume(&input, r, &word)) << " input: " << input;
    241 }
    242 
    243 TEST(RE2, ConsumeN) {
    244   const string s(" one two three 4");
    245   StringPiece input(s);
    246 
    247   RE2::Arg argv[2];
    248   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
    249 
    250   // 0 arg
    251   EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0));  // Skips "one".
    252 
    253   // 1 arg
    254   string word;
    255   argv[0] = &word;
    256   EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
    257   EXPECT_EQ("two", word);
    258 
    259   // Multi-args
    260   int n;
    261   argv[1] = &n;
    262   EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
    263   EXPECT_EQ("three", word);
    264   EXPECT_EQ(4, n);
    265 }
    266 
    267 TEST(RE2, FindAndConsume) {
    268   VLOG(1) << "TestFindAndConsume";
    269 
    270   RE2 r("(\\w+)");      // matches a word
    271   string word;
    272 
    273   string s("   aaa b!@#$@#$cccc");
    274   StringPiece input(s);
    275 
    276   CHECK(RE2::FindAndConsume(&input, r, &word));
    277   CHECK_EQ(word, "aaa");
    278   CHECK(RE2::FindAndConsume(&input, r, &word));
    279   CHECK_EQ(word, "b");
    280   CHECK(RE2::FindAndConsume(&input, r, &word));
    281   CHECK_EQ(word, "cccc");
    282   CHECK(! RE2::FindAndConsume(&input, r, &word));
    283 
    284   // Check that FindAndConsume works without any submatches.
    285   // Earlier version used uninitialized data for
    286   // length to consume.
    287   input = "aaa";
    288   CHECK(RE2::FindAndConsume(&input, "aaa"));
    289   CHECK_EQ(input, "");
    290 }
    291 
    292 TEST(RE2, FindAndConsumeN) {
    293   const string s(" one two three 4");
    294   StringPiece input(s);
    295 
    296   RE2::Arg argv[2];
    297   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
    298 
    299   // 0 arg
    300   EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0));  // Skips "one".
    301 
    302   // 1 arg
    303   string word;
    304   argv[0] = &word;
    305   EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
    306   EXPECT_EQ("two", word);
    307 
    308   // Multi-args
    309   int n;
    310   argv[1] = &n;
    311   EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
    312   EXPECT_EQ("three", word);
    313   EXPECT_EQ(4, n);
    314 }
    315 
    316 TEST(RE2, MatchNumberPeculiarity) {
    317   VLOG(1) << "TestMatchNumberPeculiarity";
    318 
    319   RE2 r("(foo)|(bar)|(baz)");
    320   string word1;
    321   string word2;
    322   string word3;
    323 
    324   CHECK(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
    325   CHECK_EQ(word1, "foo");
    326   CHECK_EQ(word2, "");
    327   CHECK_EQ(word3, "");
    328   CHECK(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
    329   CHECK_EQ(word1, "");
    330   CHECK_EQ(word2, "bar");
    331   CHECK_EQ(word3, "");
    332   CHECK(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
    333   CHECK_EQ(word1, "");
    334   CHECK_EQ(word2, "");
    335   CHECK_EQ(word3, "baz");
    336   CHECK(!RE2::PartialMatch("f", r, &word1, &word2, &word3));
    337 
    338   string a;
    339   CHECK(RE2::FullMatch("hello", "(foo)|hello", &a));
    340   CHECK_EQ(a, "");
    341 }
    342 
    343 TEST(RE2, Match) {
    344   RE2 re("((\\w+):([0-9]+))");   // extracts host and port
    345   StringPiece group[4];
    346 
    347   // No match.
    348   StringPiece s = "zyzzyva";
    349   CHECK(!re.Match(s, 0, s.size(), RE2::UNANCHORED,
    350                   group, arraysize(group)));
    351 
    352   // Matches and extracts.
    353   s = "a chrisr:9000 here";
    354   CHECK(re.Match(s, 0, s.size(), RE2::UNANCHORED,
    355                  group, arraysize(group)));
    356   CHECK_EQ(group[0], "chrisr:9000");
    357   CHECK_EQ(group[1], "chrisr:9000");
    358   CHECK_EQ(group[2], "chrisr");
    359   CHECK_EQ(group[3], "9000");
    360 
    361   string all, host;
    362   int port;
    363   CHECK(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
    364   CHECK_EQ(all, "chrisr:9000");
    365   CHECK_EQ(host, "chrisr");
    366   CHECK_EQ(port, 9000);
    367 }
    368 
    369 static void TestRecursion(int size, const char *pattern) {
    370   // Fill up a string repeating the pattern given
    371   string domain;
    372   domain.resize(size);
    373   int patlen = strlen(pattern);
    374   for (int i = 0; i < size; ++i) {
    375     domain[i] = pattern[i % patlen];
    376   }
    377   // Just make sure it doesn't crash due to too much recursion.
    378   RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
    379   RE2::FullMatch(domain, re);
    380 }
    381 
    382 // A meta-quoted string, interpreted as a pattern, should always match
    383 // the original unquoted string.
    384 static void TestQuoteMeta(string unquoted,
    385                           const RE2::Options& options = RE2::DefaultOptions) {
    386   string quoted = RE2::QuoteMeta(unquoted);
    387   RE2 re(quoted, options);
    388   EXPECT_TRUE_M(RE2::FullMatch(unquoted, re),
    389                 "Unquoted='" + unquoted + "', quoted='" + quoted + "'.");
    390 }
    391 
    392 // A meta-quoted string, interpreted as a pattern, should always match
    393 // the original unquoted string.
    394 static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
    395                                   const RE2::Options& options = RE2::DefaultOptions) {
    396   string quoted = RE2::QuoteMeta(unquoted);
    397   RE2 re(quoted, options);
    398   EXPECT_FALSE_M(RE2::FullMatch(should_not_match, re),
    399                  "Unquoted='" + unquoted + "', quoted='" + quoted + "'.");
    400 }
    401 
    402 // Tests that quoted meta characters match their original strings,
    403 // and that a few things that shouldn't match indeed do not.
    404 TEST(QuoteMeta, Simple) {
    405   TestQuoteMeta("foo");
    406   TestQuoteMeta("foo.bar");
    407   TestQuoteMeta("foo\\.bar");
    408   TestQuoteMeta("[1-9]");
    409   TestQuoteMeta("1.5-2.0?");
    410   TestQuoteMeta("\\d");
    411   TestQuoteMeta("Who doesn't like ice cream?");
    412   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
    413   TestQuoteMeta("((?!)xxx).*yyy");
    414   TestQuoteMeta("([");
    415 }
    416 TEST(QuoteMeta, SimpleNegative) {
    417   NegativeTestQuoteMeta("foo", "bar");
    418   NegativeTestQuoteMeta("...", "bar");
    419   NegativeTestQuoteMeta("\\.", ".");
    420   NegativeTestQuoteMeta("\\.", "..");
    421   NegativeTestQuoteMeta("(a)", "a");
    422   NegativeTestQuoteMeta("(a|b)", "a");
    423   NegativeTestQuoteMeta("(a|b)", "(a)");
    424   NegativeTestQuoteMeta("(a|b)", "a|b");
    425   NegativeTestQuoteMeta("[0-9]", "0");
    426   NegativeTestQuoteMeta("[0-9]", "0-9");
    427   NegativeTestQuoteMeta("[0-9]", "[9]");
    428   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
    429 }
    430 
    431 TEST(QuoteMeta, Latin1) {
    432   TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
    433 }
    434 
    435 TEST(QuoteMeta, UTF8) {
    436   TestQuoteMeta("Plcido Domingo");
    437   TestQuoteMeta("xyz");  // No fancy utf8.
    438   TestQuoteMeta("\xc2\xb0");  // 2-byte utf8 -- a degree symbol.
    439   TestQuoteMeta("27\xc2\xb0 degrees");  // As a middle character.
    440   TestQuoteMeta("\xe2\x80\xb3");  // 3-byte utf8 -- a double prime.
    441   TestQuoteMeta("\xf0\x9d\x85\x9f");  // 4-byte utf8 -- a music note.
    442   TestQuoteMeta("27\xc2\xb0");  // Interpreted as Latin-1, this should
    443                                 // still work.
    444   NegativeTestQuoteMeta("27\xc2\xb0",
    445                         "27\\\xc2\\\xb0");  // 2-byte utf8 -- a degree symbol.
    446 }
    447 
    448 TEST(QuoteMeta, HasNull) {
    449   string has_null;
    450 
    451   // string with one null character
    452   has_null += '\0';
    453   TestQuoteMeta(has_null);
    454   NegativeTestQuoteMeta(has_null, "");
    455 
    456   // Don't want null-followed-by-'1' to be interpreted as '\01'.
    457   has_null += '1';
    458   TestQuoteMeta(has_null);
    459   NegativeTestQuoteMeta(has_null, "\1");
    460 }
    461 
    462 TEST(ProgramSize, BigProgram) {
    463   RE2 re_simple("simple regexp");
    464   RE2 re_medium("medium.*regexp");
    465   RE2 re_complex("hard.{1,128}regexp");
    466 
    467   CHECK_GT(re_simple.ProgramSize(), 0);
    468   CHECK_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
    469   CHECK_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
    470 }
    471 
    472 // Issue 956519: handling empty character sets was
    473 // causing NULL dereference.  This tests a few empty character sets.
    474 // (The way to get an empty character set is to negate a full one.)
    475 TEST(EmptyCharset, Fuzz) {
    476   static const char *empties[] = {
    477     "[^\\S\\s]",
    478     "[^\\S[:space:]]",
    479     "[^\\D\\d]",
    480     "[^\\D[:digit:]]"
    481   };
    482   for (int i = 0; i < arraysize(empties); i++)
    483     CHECK(!RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
    484 }
    485 
    486 // Test that named groups work correctly.
    487 TEST(Capture, NamedGroups) {
    488   {
    489     RE2 re("(hello world)");
    490     CHECK_EQ(re.NumberOfCapturingGroups(), 1);
    491     const map<string, int>& m = re.NamedCapturingGroups();
    492     CHECK_EQ(m.size(), 0);
    493   }
    494 
    495   {
    496     RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
    497     CHECK_EQ(re.NumberOfCapturingGroups(), 6);
    498     const map<string, int>& m = re.NamedCapturingGroups();
    499     CHECK_EQ(m.size(), 4);
    500     CHECK_EQ(m.find("A")->second, 1);
    501     CHECK_EQ(m.find("B")->second, 2);
    502     CHECK_EQ(m.find("C")->second, 3);
    503     CHECK_EQ(m.find("D")->second, 6);  // $4 and $5 are anonymous
    504   }
    505 }
    506 
    507 TEST(RE2, FullMatchWithNoArgs) {
    508   CHECK(RE2::FullMatch("h", "h"));
    509   CHECK(RE2::FullMatch("hello", "hello"));
    510   CHECK(RE2::FullMatch("hello", "h.*o"));
    511   CHECK(!RE2::FullMatch("othello", "h.*o"));       // Must be anchored at front
    512   CHECK(!RE2::FullMatch("hello!", "h.*o"));        // Must be anchored at end
    513 }
    514 
    515 TEST(RE2, PartialMatch) {
    516   CHECK(RE2::PartialMatch("x", "x"));
    517   CHECK(RE2::PartialMatch("hello", "h.*o"));
    518   CHECK(RE2::PartialMatch("othello", "h.*o"));
    519   CHECK(RE2::PartialMatch("hello!", "h.*o"));
    520   CHECK(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
    521 }
    522 
    523 TEST(RE2, PartialMatchN) {
    524   RE2::Arg argv[2];
    525   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
    526 
    527   // 0 arg
    528   EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
    529   EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
    530 
    531   // 1 arg
    532   int i;
    533   argv[0] = &i;
    534   EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
    535   EXPECT_EQ(1001, i);
    536   EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
    537 
    538   // Multi-arg
    539   string s;
    540   argv[1] = &s;
    541   EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
    542   EXPECT_EQ(42, i);
    543   EXPECT_EQ("life", s);
    544   EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
    545 }
    546 
    547 TEST(RE2, FullMatchZeroArg) {
    548   // Zero-arg
    549   CHECK(RE2::FullMatch("1001", "\\d+"));
    550 }
    551 
    552 TEST(RE2, FullMatchOneArg) {
    553   int i;
    554 
    555   // Single-arg
    556   CHECK(RE2::FullMatch("1001", "(\\d+)",   &i));
    557   CHECK_EQ(i, 1001);
    558   CHECK(RE2::FullMatch("-123", "(-?\\d+)", &i));
    559   CHECK_EQ(i, -123);
    560   CHECK(!RE2::FullMatch("10", "()\\d+", &i));
    561   CHECK(!RE2::FullMatch("1234567890123456789012345678901234567890",
    562                        "(\\d+)", &i));
    563 }
    564 
    565 TEST(RE2, FullMatchIntegerArg) {
    566   int i;
    567 
    568   // Digits surrounding integer-arg
    569   CHECK(RE2::FullMatch("1234", "1(\\d*)4", &i));
    570   CHECK_EQ(i, 23);
    571   CHECK(RE2::FullMatch("1234", "(\\d)\\d+", &i));
    572   CHECK_EQ(i, 1);
    573   CHECK(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
    574   CHECK_EQ(i, -1);
    575   CHECK(RE2::PartialMatch("1234", "(\\d)", &i));
    576   CHECK_EQ(i, 1);
    577   CHECK(RE2::PartialMatch("-1234", "(-\\d)", &i));
    578   CHECK_EQ(i, -1);
    579 }
    580 
    581 TEST(RE2, FullMatchStringArg) {
    582   string s;
    583   // String-arg
    584   CHECK(RE2::FullMatch("hello", "h(.*)o", &s));
    585   CHECK_EQ(s, string("ell"));
    586 }
    587 
    588 TEST(RE2, FullMatchStringPieceArg) {
    589   int i;
    590   // StringPiece-arg
    591   StringPiece sp;
    592   CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
    593   CHECK_EQ(sp.size(), 4);
    594   CHECK(memcmp(sp.data(), "ruby", 4) == 0);
    595   CHECK_EQ(i, 1234);
    596 }
    597 
    598 TEST(RE2, FullMatchMultiArg) {
    599   int i;
    600   string s;
    601   // Multi-arg
    602   CHECK(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
    603   CHECK_EQ(s, string("ruby"));
    604   CHECK_EQ(i, 1234);
    605 }
    606 
    607 TEST(RE2, FullMatchN) {
    608   RE2::Arg argv[2];
    609   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
    610 
    611   // 0 arg
    612   EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
    613   EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
    614 
    615   // 1 arg
    616   int i;
    617   argv[0] = &i;
    618   EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
    619   EXPECT_EQ(1001, i);
    620   EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
    621 
    622   // Multi-arg
    623   string s;
    624   argv[1] = &s;
    625   EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
    626   EXPECT_EQ(42, i);
    627   EXPECT_EQ("life", s);
    628   EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
    629 }
    630 
    631 TEST(RE2, FullMatchIgnoredArg) {
    632   int i;
    633   string s;
    634   // Ignored arg
    635   CHECK(RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
    636   CHECK_EQ(s, string("ruby"));
    637   CHECK_EQ(i, 1234);
    638 }
    639 
    640 TEST(RE2, FullMatchTypedNullArg) {
    641   string s;
    642 
    643   // Ignore non-void* NULL arg
    644   CHECK(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
    645   CHECK(RE2::FullMatch("hello", "h(.*)o", (string*)NULL));
    646   CHECK(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
    647   CHECK(RE2::FullMatch("1234", "(.*)", (int*)NULL));
    648   CHECK(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
    649   CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
    650   CHECK(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
    651 
    652   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
    653   CHECK(!RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
    654   CHECK(!RE2::FullMatch("hello", "(.*)", (int*)NULL));
    655   CHECK(!RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
    656   CHECK(!RE2::FullMatch("hello", "(.*)", (double*)NULL));
    657   CHECK(!RE2::FullMatch("hello", "(.*)", (float*)NULL));
    658 }
    659 
    660 // Check that numeric parsing code does not read past the end of
    661 // the number being parsed.
    662 TEST(RE2, NULTerminated) {
    663   char *v;
    664   int x;
    665   long pagesize = sysconf(_SC_PAGE_SIZE);
    666 
    667 #ifndef MAP_ANONYMOUS
    668 #define MAP_ANONYMOUS MAP_ANON
    669 #endif
    670   v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
    671                               MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
    672   CHECK(v != reinterpret_cast<char*>(-1));
    673   LOG(INFO) << "Memory at " << (void*)v;
    674   CHECK_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
    675   v[pagesize - 1] = '1';
    676 
    677   x = 0;
    678   CHECK(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
    679   CHECK_EQ(x, 1);
    680 }
    681 
    682 TEST(RE2, FullMatchTypeTests) {
    683   // Type tests
    684   string zeros(100, '0');
    685   {
    686     char c;
    687     CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
    688     CHECK_EQ(c, 'H');
    689   }
    690   {
    691     unsigned char c;
    692     CHECK(RE2::FullMatch("Hello", "(H)ello", &c));
    693     CHECK_EQ(c, static_cast<unsigned char>('H'));
    694   }
    695   {
    696     int16 v;
    697     CHECK(RE2::FullMatch("100",     "(-?\\d+)", &v));    CHECK_EQ(v, 100);
    698     CHECK(RE2::FullMatch("-100",    "(-?\\d+)", &v));    CHECK_EQ(v, -100);
    699     CHECK(RE2::FullMatch("32767",   "(-?\\d+)", &v));    CHECK_EQ(v, 32767);
    700     CHECK(RE2::FullMatch("-32768",  "(-?\\d+)", &v));    CHECK_EQ(v, -32768);
    701     CHECK(!RE2::FullMatch("-32769", "(-?\\d+)", &v));
    702     CHECK(!RE2::FullMatch("32768",  "(-?\\d+)", &v));
    703   }
    704   {
    705     uint16 v;
    706     CHECK(RE2::FullMatch("100",     "(\\d+)", &v));    CHECK_EQ(v, 100);
    707     CHECK(RE2::FullMatch("32767",   "(\\d+)", &v));    CHECK_EQ(v, 32767);
    708     CHECK(RE2::FullMatch("65535",   "(\\d+)", &v));    CHECK_EQ(v, 65535);
    709     CHECK(!RE2::FullMatch("65536",  "(\\d+)", &v));
    710   }
    711   {
    712     int32 v;
    713     static const int32 max = 0x7fffffff;
    714     static const int32 min = -max - 1;
    715     CHECK(RE2::FullMatch("100",          "(-?\\d+)", &v)); CHECK_EQ(v, 100);
    716     CHECK(RE2::FullMatch("-100",         "(-?\\d+)", &v)); CHECK_EQ(v, -100);
    717     CHECK(RE2::FullMatch("2147483647",   "(-?\\d+)", &v)); CHECK_EQ(v, max);
    718     CHECK(RE2::FullMatch("-2147483648",  "(-?\\d+)", &v)); CHECK_EQ(v, min);
    719     CHECK(!RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
    720     CHECK(!RE2::FullMatch("2147483648",  "(-?\\d+)", &v));
    721 
    722     CHECK(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
    723     CHECK_EQ(v, max);
    724     CHECK(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
    725     CHECK_EQ(v, min);
    726 
    727     CHECK(!RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
    728     CHECK(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
    729     CHECK_EQ(v, max);
    730     CHECK(!RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
    731   }
    732   {
    733     uint32 v;
    734     static const uint32 max = 0xfffffffful;
    735     CHECK(RE2::FullMatch("100",         "(\\d+)", &v)); CHECK_EQ(v, 100);
    736     CHECK(RE2::FullMatch("4294967295",  "(\\d+)", &v)); CHECK_EQ(v, max);
    737     CHECK(!RE2::FullMatch("4294967296", "(\\d+)", &v));
    738     CHECK(!RE2::FullMatch("-1",         "(\\d+)", &v));
    739 
    740     CHECK(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); CHECK_EQ(v, max);
    741   }
    742   {
    743     int64 v;
    744     static const int64 max = 0x7fffffffffffffffull;
    745     static const int64 min = -max - 1;
    746     char buf[32];
    747 
    748     CHECK(RE2::FullMatch("100",  "(-?\\d+)", &v)); CHECK_EQ(v, 100);
    749     CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v)); CHECK_EQ(v, -100);
    750 
    751     snprintf(buf, sizeof(buf), "%lld", (long long int)max);
    752     CHECK(RE2::FullMatch(buf,    "(-?\\d+)", &v)); CHECK_EQ(v, max);
    753 
    754     snprintf(buf, sizeof(buf), "%lld", (long long int)min);
    755     CHECK(RE2::FullMatch(buf,    "(-?\\d+)", &v)); CHECK_EQ(v, min);
    756 
    757     snprintf(buf, sizeof(buf), "%lld", (long long int)max);
    758     assert(buf[strlen(buf)-1] != '9');
    759     buf[strlen(buf)-1]++;
    760     CHECK(!RE2::FullMatch(buf,   "(-?\\d+)", &v));
    761 
    762     snprintf(buf, sizeof(buf), "%lld", (long long int)min);
    763     assert(buf[strlen(buf)-1] != '9');
    764     buf[strlen(buf)-1]++;
    765     CHECK(!RE2::FullMatch(buf,   "(-?\\d+)", &v));
    766   }
    767   {
    768     uint64 v;
    769     int64 v2;
    770     static const uint64 max = 0xffffffffffffffffull;
    771     char buf[32];
    772 
    773     CHECK(RE2::FullMatch("100",  "(-?\\d+)", &v));  CHECK_EQ(v, 100);
    774     CHECK(RE2::FullMatch("-100", "(-?\\d+)", &v2)); CHECK_EQ(v2, -100);
    775 
    776     snprintf(buf, sizeof(buf), "%llu", (long long unsigned)max);
    777     CHECK(RE2::FullMatch(buf,    "(-?\\d+)", &v)); CHECK_EQ(v, max);
    778 
    779     assert(buf[strlen(buf)-1] != '9');
    780     buf[strlen(buf)-1]++;
    781     CHECK(!RE2::FullMatch(buf,   "(-?\\d+)", &v));
    782   }
    783 }
    784 
    785 TEST(RE2, FloatingPointFullMatchTypes) {
    786   string zeros(100, '0');
    787   {
    788     float v;
    789     CHECK(RE2::FullMatch("100",   "(.*)", &v));  CHECK_EQ(v, 100);
    790     CHECK(RE2::FullMatch("-100.", "(.*)", &v));  CHECK_EQ(v, -100);
    791     CHECK(RE2::FullMatch("1e23",  "(.*)", &v));  CHECK_EQ(v, float(1e23));
    792 
    793     CHECK(RE2::FullMatch(zeros + "1e23",  "(.*)", &v));
    794     CHECK_EQ(v, float(1e23));
    795 
    796     // 6700000000081920.1 is an edge case.
    797     // 6700000000081920 is exactly halfway between
    798     // two float32s, so the .1 should make it round up.
    799     // However, the .1 is outside the precision possible with
    800     // a float64: the nearest float64 is 6700000000081920.
    801     // So if the code uses strtod and then converts to float32,
    802     // round-to-even will make it round down instead of up.
    803     // To pass the test, the parser must call strtof directly.
    804     // This test case is carefully chosen to use only a 17-digit
    805     // number, since C does not guarantee to get the correctly
    806     // rounded answer for strtod and strtof unless the input is
    807     // short.
    808     CHECK(RE2::FullMatch("0.1", "(.*)", &v));
    809     CHECK_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
    810     CHECK(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
    811     CHECK_EQ(v, 6700000000081920.1f)
    812       << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
    813   }
    814   {
    815     double v;
    816     CHECK(RE2::FullMatch("100",   "(.*)", &v));  CHECK_EQ(v, 100);
    817     CHECK(RE2::FullMatch("-100.", "(.*)", &v));  CHECK_EQ(v, -100);
    818     CHECK(RE2::FullMatch("1e23",  "(.*)", &v));  CHECK_EQ(v, 1e23);
    819     CHECK(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
    820     CHECK_EQ(v, double(1e23));
    821 
    822     CHECK(RE2::FullMatch("0.1", "(.*)", &v));
    823     CHECK_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
    824     CHECK(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
    825     CHECK_EQ(v, 1.0000000596046448)
    826       << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
    827   }
    828 }
    829 
    830 TEST(RE2, FullMatchAnchored) {
    831   int i;
    832   // Check that matching is fully anchored
    833   CHECK(!RE2::FullMatch("x1001", "(\\d+)",  &i));
    834   CHECK(!RE2::FullMatch("1001x", "(\\d+)",  &i));
    835   CHECK(RE2::FullMatch("x1001",  "x(\\d+)", &i)); CHECK_EQ(i, 1001);
    836   CHECK(RE2::FullMatch("1001x",  "(\\d+)x", &i)); CHECK_EQ(i, 1001);
    837 }
    838 
    839 TEST(RE2, FullMatchBraces) {
    840   // Braces
    841   CHECK(RE2::FullMatch("0abcd",  "[0-9a-f+.-]{5,}"));
    842   CHECK(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
    843   CHECK(!RE2::FullMatch("0abc",  "[0-9a-f+.-]{5,}"));
    844 }
    845 
    846 TEST(RE2, Complicated) {
    847   // Complicated RE2
    848   CHECK(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
    849   CHECK(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
    850   CHECK(RE2::FullMatch("X",   "foo|bar|[A-Z]"));
    851   CHECK(!RE2::FullMatch("XY", "foo|bar|[A-Z]"));
    852 }
    853 
    854 TEST(RE2, FullMatchEnd) {
    855   // Check full-match handling (needs '$' tacked on internally)
    856   CHECK(RE2::FullMatch("fo", "fo|foo"));
    857   CHECK(RE2::FullMatch("foo", "fo|foo"));
    858   CHECK(RE2::FullMatch("fo", "fo|foo$"));
    859   CHECK(RE2::FullMatch("foo", "fo|foo$"));
    860   CHECK(RE2::FullMatch("foo", "foo$"));
    861   CHECK(!RE2::FullMatch("foo$bar", "foo\\$"));
    862   CHECK(!RE2::FullMatch("fox", "fo|bar"));
    863 
    864   // Uncomment the following if we change the handling of '$' to
    865   // prevent it from matching a trailing newline
    866   if (false) {
    867     // Check that we don't get bitten by pcre's special handling of a
    868     // '\n' at the end of the string matching '$'
    869     CHECK(!RE2::PartialMatch("foo\n", "foo$"));
    870   }
    871 }
    872 
    873 TEST(RE2, FullMatchArgCount) {
    874   // Number of args
    875   int a[16];
    876   CHECK(RE2::FullMatch("", ""));
    877 
    878   memset(a, 0, sizeof(0));
    879   CHECK(RE2::FullMatch("1",
    880                       "(\\d){1}",
    881                       &a[0]));
    882   CHECK_EQ(a[0], 1);
    883 
    884   memset(a, 0, sizeof(0));
    885   CHECK(RE2::FullMatch("12",
    886                       "(\\d)(\\d)",
    887                       &a[0],  &a[1]));
    888   CHECK_EQ(a[0], 1);
    889   CHECK_EQ(a[1], 2);
    890 
    891   memset(a, 0, sizeof(0));
    892   CHECK(RE2::FullMatch("123",
    893                       "(\\d)(\\d)(\\d)",
    894                       &a[0],  &a[1],  &a[2]));
    895   CHECK_EQ(a[0], 1);
    896   CHECK_EQ(a[1], 2);
    897   CHECK_EQ(a[2], 3);
    898 
    899   memset(a, 0, sizeof(0));
    900   CHECK(RE2::FullMatch("1234",
    901                       "(\\d)(\\d)(\\d)(\\d)",
    902                       &a[0],  &a[1],  &a[2],  &a[3]));
    903   CHECK_EQ(a[0], 1);
    904   CHECK_EQ(a[1], 2);
    905   CHECK_EQ(a[2], 3);
    906   CHECK_EQ(a[3], 4);
    907 
    908   memset(a, 0, sizeof(0));
    909   CHECK(RE2::FullMatch("12345",
    910                       "(\\d)(\\d)(\\d)(\\d)(\\d)",
    911                       &a[0],  &a[1],  &a[2],  &a[3],
    912                       &a[4]));
    913   CHECK_EQ(a[0], 1);
    914   CHECK_EQ(a[1], 2);
    915   CHECK_EQ(a[2], 3);
    916   CHECK_EQ(a[3], 4);
    917   CHECK_EQ(a[4], 5);
    918 
    919   memset(a, 0, sizeof(0));
    920   CHECK(RE2::FullMatch("123456",
    921                       "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
    922                       &a[0],  &a[1],  &a[2],  &a[3],
    923                       &a[4],  &a[5]));
    924   CHECK_EQ(a[0], 1);
    925   CHECK_EQ(a[1], 2);
    926   CHECK_EQ(a[2], 3);
    927   CHECK_EQ(a[3], 4);
    928   CHECK_EQ(a[4], 5);
    929   CHECK_EQ(a[5], 6);
    930 
    931   memset(a, 0, sizeof(0));
    932   CHECK(RE2::FullMatch("1234567",
    933                       "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
    934                       &a[0],  &a[1],  &a[2],  &a[3],
    935                       &a[4],  &a[5],  &a[6]));
    936   CHECK_EQ(a[0], 1);
    937   CHECK_EQ(a[1], 2);
    938   CHECK_EQ(a[2], 3);
    939   CHECK_EQ(a[3], 4);
    940   CHECK_EQ(a[4], 5);
    941   CHECK_EQ(a[5], 6);
    942   CHECK_EQ(a[6], 7);
    943 
    944   memset(a, 0, sizeof(0));
    945   CHECK(RE2::FullMatch("1234567890123456",
    946                       "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
    947                       "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
    948                       &a[0],  &a[1],  &a[2],  &a[3],
    949                       &a[4],  &a[5],  &a[6],  &a[7],
    950                       &a[8],  &a[9],  &a[10], &a[11],
    951                       &a[12], &a[13], &a[14], &a[15]));
    952   CHECK_EQ(a[0], 1);
    953   CHECK_EQ(a[1], 2);
    954   CHECK_EQ(a[2], 3);
    955   CHECK_EQ(a[3], 4);
    956   CHECK_EQ(a[4], 5);
    957   CHECK_EQ(a[5], 6);
    958   CHECK_EQ(a[6], 7);
    959   CHECK_EQ(a[7], 8);
    960   CHECK_EQ(a[8], 9);
    961   CHECK_EQ(a[9], 0);
    962   CHECK_EQ(a[10], 1);
    963   CHECK_EQ(a[11], 2);
    964   CHECK_EQ(a[12], 3);
    965   CHECK_EQ(a[13], 4);
    966   CHECK_EQ(a[14], 5);
    967   CHECK_EQ(a[15], 6);
    968 }
    969 
    970 TEST(RE2, Accessors) {
    971   // Check the pattern() accessor
    972   {
    973     const string kPattern = "http://([^/]+)/.*";
    974     const RE2 re(kPattern);
    975     CHECK_EQ(kPattern, re.pattern());
    976   }
    977 
    978   // Check RE2 error field.
    979   {
    980     RE2 re("foo");
    981     CHECK(re.error().empty());  // Must have no error
    982     CHECK(re.ok());
    983     CHECK(re.error_code() == RE2::NoError);
    984   }
    985 }
    986 
    987 TEST(RE2, UTF8) {
    988   // Check UTF-8 handling
    989   // Three Japanese characters (nihongo)
    990   const char utf8_string[] = {
    991        0xe6, 0x97, 0xa5, // 65e5
    992        0xe6, 0x9c, 0xac, // 627c
    993        0xe8, 0xaa, 0x9e, // 8a9e
    994        0
    995   };
    996   const char utf8_pattern[] = {
    997        '.',
    998        0xe6, 0x9c, 0xac, // 627c
    999        '.',
   1000        0
   1001   };
   1002 
   1003   // Both should match in either mode, bytes or UTF-8
   1004   RE2 re_test1(".........", RE2::Latin1);
   1005   CHECK(RE2::FullMatch(utf8_string, re_test1));
   1006   RE2 re_test2("...");
   1007   CHECK(RE2::FullMatch(utf8_string, re_test2));
   1008 
   1009   // Check that '.' matches one byte or UTF-8 character
   1010   // according to the mode.
   1011   string s;
   1012   RE2 re_test3("(.)", RE2::Latin1);
   1013   CHECK(RE2::PartialMatch(utf8_string, re_test3, &s));
   1014   CHECK_EQ(s, string("\xe6"));
   1015   RE2 re_test4("(.)");
   1016   CHECK(RE2::PartialMatch(utf8_string, re_test4, &s));
   1017   CHECK_EQ(s, string("\xe6\x97\xa5"));
   1018 
   1019   // Check that string matches itself in either mode
   1020   RE2 re_test5(utf8_string, RE2::Latin1);
   1021   CHECK(RE2::FullMatch(utf8_string, re_test5));
   1022   RE2 re_test6(utf8_string);
   1023   CHECK(RE2::FullMatch(utf8_string, re_test6));
   1024 
   1025   // Check that pattern matches string only in UTF8 mode
   1026   RE2 re_test7(utf8_pattern, RE2::Latin1);
   1027   CHECK(!RE2::FullMatch(utf8_string, re_test7));
   1028   RE2 re_test8(utf8_pattern);
   1029   CHECK(RE2::FullMatch(utf8_string, re_test8));
   1030 }
   1031 
   1032 TEST(RE2, UngreedyUTF8) {
   1033   // Check that ungreedy, UTF8 regular expressions don't match when they
   1034   // oughtn't -- see bug 82246.
   1035   {
   1036     // This code always worked.
   1037     const char* pattern = "\\w+X";
   1038     const string target = "a aX";
   1039     RE2 match_sentence(pattern, RE2::Latin1);
   1040     RE2 match_sentence_re(pattern);
   1041 
   1042     CHECK(!RE2::FullMatch(target, match_sentence));
   1043     CHECK(!RE2::FullMatch(target, match_sentence_re));
   1044   }
   1045   {
   1046     const char* pattern = "(?U)\\w+X";
   1047     const string target = "a aX";
   1048     RE2 match_sentence(pattern, RE2::Latin1);
   1049     CHECK_EQ(match_sentence.error(), "");
   1050     RE2 match_sentence_re(pattern);
   1051 
   1052     CHECK(!RE2::FullMatch(target, match_sentence));
   1053     CHECK(!RE2::FullMatch(target, match_sentence_re));
   1054   }
   1055 }
   1056 
   1057 TEST(RE2, Rejects) {
   1058   { RE2 re("a\\1", RE2::Quiet); CHECK(!re.ok()); }
   1059   {
   1060     RE2 re("a[x", RE2::Quiet);
   1061     CHECK(!re.ok());
   1062   }
   1063   {
   1064     RE2 re("a[z-a]", RE2::Quiet);
   1065     CHECK(!re.ok());
   1066   }
   1067   {
   1068     RE2 re("a[[:foobar:]]", RE2::Quiet);
   1069     CHECK(!re.ok());
   1070   }
   1071   {
   1072     RE2 re("a(b", RE2::Quiet);
   1073     CHECK(!re.ok());
   1074   }
   1075   {
   1076     RE2 re("a\\", RE2::Quiet);
   1077     CHECK(!re.ok());
   1078   }
   1079 }
   1080 
   1081 TEST(RE2, NoCrash) {
   1082   // Test that using a bad regexp doesn't crash.
   1083   {
   1084     RE2 re("a\\", RE2::Quiet);
   1085     CHECK(!re.ok());
   1086     CHECK(!RE2::PartialMatch("a\\b", re));
   1087   }
   1088 
   1089   // Test that using an enormous regexp doesn't crash
   1090   {
   1091     RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
   1092     CHECK(!re.ok());
   1093     CHECK(!RE2::PartialMatch("aaa", re));
   1094   }
   1095 
   1096   // Test that a crazy regexp still compiles and runs.
   1097   {
   1098     RE2 re(".{512}x", RE2::Quiet);
   1099     CHECK(re.ok());
   1100     string s;
   1101     s.append(515, 'c');
   1102     s.append("x");
   1103     CHECK(RE2::PartialMatch(s, re));
   1104   }
   1105 }
   1106 
   1107 TEST(RE2, Recursion) {
   1108   // Test that recursion is stopped.
   1109   // This test is PCRE-legacy -- there's no recursion in RE2.
   1110   int bytes = 15 * 1024;  // enough to crash PCRE
   1111   TestRecursion(bytes, ".");
   1112   TestRecursion(bytes, "a");
   1113   TestRecursion(bytes, "a.");
   1114   TestRecursion(bytes, "ab.");
   1115   TestRecursion(bytes, "abc.");
   1116 }
   1117 
   1118 TEST(RE2, BigCountedRepetition) {
   1119   // Test that counted repetition works, given tons of memory.
   1120   RE2::Options opt;
   1121   opt.set_max_mem(256<<20);
   1122 
   1123   RE2 re(".{512}x", opt);
   1124   CHECK(re.ok());
   1125   string s;
   1126   s.append(515, 'c');
   1127   s.append("x");
   1128   CHECK(RE2::PartialMatch(s, re));
   1129 }
   1130 
   1131 TEST(RE2, DeepRecursion) {
   1132   // Test for deep stack recursion.  This would fail with a
   1133   // segmentation violation due to stack overflow before pcre was
   1134   // patched.
   1135   // Again, a PCRE legacy test.  RE2 doesn't recurse.
   1136   string comment("x*");
   1137   string a(131072, 'a');
   1138   comment += a;
   1139   comment += "*x";
   1140   RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
   1141   CHECK(RE2::FullMatch(comment, re));
   1142 }
   1143 
   1144 // Suggested by Josh Hyman.  Failed when SearchOnePass was
   1145 // not implementing case-folding.
   1146 TEST(CaseInsensitive, MatchAndConsume) {
   1147   string result;
   1148   string text = "A fish named *Wanda*";
   1149   StringPiece sp(text);
   1150 
   1151   EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result));
   1152   EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
   1153 }
   1154 
   1155 // RE2 should permit implicit conversions from string, StringPiece, const char*,
   1156 // and C string literals.
   1157 TEST(RE2, ImplicitConversions) {
   1158   string re_string(".");
   1159   StringPiece re_stringpiece(".");
   1160   const char* re_cstring = ".";
   1161   EXPECT_TRUE(RE2::PartialMatch("e", re_string));
   1162   EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
   1163   EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
   1164   EXPECT_TRUE(RE2::PartialMatch("e", "."));
   1165 }
   1166 
   1167 // Bugs introduced by 8622304
   1168 TEST(RE2, CL8622304) {
   1169   // reported by ingow
   1170   string dir;
   1171   EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])"));  // ok
   1172   EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir));  // fails
   1173 
   1174   // reported by jacobsa
   1175   string key, val;
   1176   EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
   1177               "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
   1178               &key,
   1179               &val));
   1180   EXPECT_EQ(key, "bar");
   1181   EXPECT_EQ(val, "1,0x2F,030,4,5");
   1182 }
   1183 
   1184 
   1185 // Check that RE2 returns correct regexp pieces on error.
   1186 // In particular, make sure it returns whole runes
   1187 // and that it always reports invalid UTF-8.
   1188 // Also check that Perl error flag piece is big enough.
   1189 static struct ErrorTest {
   1190   const char *regexp;
   1191   const char *error;
   1192 } error_tests[] = {
   1193   { "ab\\cd", "\\" },
   1194   { "ef\\x01", "\\x0" },
   1195   { "gh\\x101", "\\x1" },
   1196   { "ij\\x1", "\\x1" },
   1197   { "kl\\x", "\\x" },
   1198   { "uv\\x{0000}", "\\x{0000" },
   1199   { "wx\\p{ABC", "\\p{ABC" },
   1200   { "yz(?smiUX:abc)", "(?smiUX" },   // used to return (?s but the error is X
   1201   { "aa(?smi", "(?sm" },
   1202   { "bb[abc", "[abc" },
   1203 
   1204   { "mn\\x1\377", "" },  // no argument string returned for invalid UTF-8
   1205   { "op\377qr", "" },
   1206   { "st\\x{00000\377", "" },
   1207   { "zz\\p{\377}", "" },
   1208   { "zz\\x{00\377}", "" },
   1209   { "zz(?P<name\377>abc)", "" },
   1210 };
   1211 TEST(RE2, ErrorArgs) {
   1212   for (int i = 0; i < arraysize(error_tests); i++) {
   1213     RE2 re(error_tests[i].regexp, RE2::Quiet);
   1214     EXPECT_FALSE(re.ok());
   1215     EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
   1216   }
   1217 }
   1218 
   1219 // Check that "never match \n" mode never matches \n.
   1220 static struct NeverTest {
   1221   const char* regexp;
   1222   const char* text;
   1223   const char* match;
   1224 } never_tests[] = {
   1225   { "(.*)", "abc\ndef\nghi\n", "abc" },
   1226   { "(?s)(abc.*def)", "abc\ndef\n", NULL },
   1227   { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
   1228   { "(abc[^x]*def)", "abc\ndef\n", NULL },
   1229   { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
   1230 };
   1231 TEST(RE2, NeverNewline) {
   1232   RE2::Options opt;
   1233   opt.set_never_nl(true);
   1234   for (int i = 0; i < arraysize(never_tests); i++) {
   1235     const NeverTest& t = never_tests[i];
   1236     RE2 re(t.regexp, opt);
   1237     if (t.match == NULL) {
   1238       EXPECT_FALSE(re.PartialMatch(t.text, re));
   1239     } else {
   1240       StringPiece m;
   1241       EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
   1242       EXPECT_EQ(m, t.match);
   1243     }
   1244   }
   1245 }
   1246 
   1247 // Check that there are no capturing groups in "never capture" mode.
   1248 TEST(RE2, NeverCapture) {
   1249   RE2::Options opt;
   1250   opt.set_never_capture(true);
   1251   RE2 re("(r)(e)", opt);
   1252   EXPECT_EQ(0, re.NumberOfCapturingGroups());
   1253 }
   1254 
   1255 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
   1256 // Triggered by a failed DFA search falling back to Bitstate when
   1257 // using Match with a NULL submatch set.  Bitstate tried to read
   1258 // the submatch[0] entry even if nsubmatch was 0.
   1259 TEST(RE2, BitstateCaptureBug) {
   1260   RE2::Options opt;
   1261   opt.set_max_mem(20000);
   1262   RE2 re("(_________$)", opt);
   1263   StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
   1264   EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
   1265 }
   1266 
   1267 // C++ version of bug 609710.
   1268 TEST(RE2, UnicodeClasses) {
   1269   const string str = "ABCDEFGHI";
   1270   string a, b, c;
   1271 
   1272   EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
   1273   EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
   1274   EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
   1275   EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
   1276   EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
   1277   EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
   1278 
   1279   EXPECT_TRUE(RE2::FullMatch("", "\\p{L}"));
   1280   EXPECT_FALSE(RE2::FullMatch("", "\\p{Lu}"));
   1281   EXPECT_FALSE(RE2::FullMatch("", "\\p{Ll}"));
   1282   EXPECT_FALSE(RE2::FullMatch("", "\\P{L}"));
   1283   EXPECT_TRUE(RE2::FullMatch("", "\\P{Lu}"));
   1284   EXPECT_TRUE(RE2::FullMatch("", "\\P{Ll}"));
   1285 
   1286   EXPECT_TRUE(RE2::FullMatch("", "\\p{L}"));
   1287   EXPECT_FALSE(RE2::FullMatch("", "\\p{Lu}"));
   1288   EXPECT_FALSE(RE2::FullMatch("", "\\p{Ll}"));
   1289   EXPECT_FALSE(RE2::FullMatch("", "\\P{L}"));
   1290   EXPECT_TRUE(RE2::FullMatch("", "\\P{Lu}"));
   1291   EXPECT_TRUE(RE2::FullMatch("", "\\P{Ll}"));
   1292 
   1293   EXPECT_TRUE(RE2::FullMatch("", "\\p{L}"));
   1294   EXPECT_FALSE(RE2::FullMatch("", "\\p{Lu}"));
   1295   EXPECT_FALSE(RE2::FullMatch("", "\\p{Ll}"));
   1296   EXPECT_FALSE(RE2::FullMatch("", "\\P{L}"));
   1297   EXPECT_TRUE(RE2::FullMatch("", "\\P{Lu}"));
   1298   EXPECT_TRUE(RE2::FullMatch("", "\\P{Ll}"));
   1299 
   1300   EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
   1301   EXPECT_EQ("A", a);
   1302   EXPECT_EQ("B", b);
   1303   EXPECT_EQ("C", c);
   1304 
   1305   EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
   1306   EXPECT_EQ("A", a);
   1307   EXPECT_EQ("B", b);
   1308   EXPECT_EQ("C", c);
   1309 
   1310   EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
   1311 
   1312   EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
   1313   EXPECT_EQ("A", a);
   1314   EXPECT_EQ("B", b);
   1315   EXPECT_EQ("C", c);
   1316 
   1317   EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
   1318 
   1319   EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
   1320   EXPECT_EQ("", a);
   1321   EXPECT_EQ("", b);
   1322   EXPECT_EQ("", c);
   1323 }
   1324 
   1325 // Bug reported by saito. 2009/02/17
   1326 TEST(RE2, NullVsEmptyString) {
   1327   RE2 re2(".*");
   1328   StringPiece v1("");
   1329   EXPECT_TRUE(RE2::FullMatch(v1, re2));
   1330 
   1331   StringPiece v2;
   1332   EXPECT_TRUE(RE2::FullMatch(v2, re2));
   1333 }
   1334 
   1335 // Issue 1816809
   1336 TEST(RE2, Bug1816809) {
   1337   RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
   1338   StringPiece piece("llx-3;llx4");
   1339   string x;
   1340   EXPECT_TRUE(RE2::Consume(&piece, re, &x));
   1341 }
   1342 
   1343 // Issue 3061120
   1344 TEST(RE2, Bug3061120) {
   1345   RE2 re("(?i)\\W");
   1346   EXPECT_FALSE(RE2::PartialMatch("x", re));  // always worked
   1347   EXPECT_FALSE(RE2::PartialMatch("k", re));  // broke because of kelvin
   1348   EXPECT_FALSE(RE2::PartialMatch("s", re));  // broke because of latin long s
   1349 }
   1350 
   1351 TEST(RE2, CapturingGroupNames) {
   1352   // Opening parentheses annotated with group IDs:
   1353   //      12    3        45   6         7
   1354   RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
   1355   EXPECT_TRUE(re.ok());
   1356   const map<int, string>& have = re.CapturingGroupNames();
   1357   map<int, string> want;
   1358   want[3] = "G2";
   1359   want[6] = "G2";
   1360   want[7] = "G1";
   1361   EXPECT_EQ(want, have);
   1362 }
   1363 
   1364 TEST(RE2, RegexpToStringLossOfAnchor) {
   1365   EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
   1366   EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
   1367   EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
   1368   EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
   1369 }
   1370 
   1371 }  // namespace re2
   1372