Home | History | Annotate | Download | only in testing
      1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Exhaustive testing of regular expression matching.
      6 
      7 // Each test picks an alphabet (e.g., "abc"), a maximum string length,
      8 // a maximum regular expression length, and a maximum number of letters
      9 // that can appear in the regular expression.  Given these parameters,
     10 // it tries every possible regular expression and string, verifying that
     11 // the NFA, DFA, and a trivial backtracking implementation agree about
     12 // the location of the match.
     13 
     14 #include <stdlib.h>
     15 #include <stdio.h>
     16 
     17 #ifndef LOGGING
     18 #define LOGGING 0
     19 #endif
     20 
     21 #include "util/test.h"
     22 #include "re2/testing/exhaustive_tester.h"
     23 #include "re2/testing/tester.h"
     24 
     25 DEFINE_bool(show_regexps, false, "show regexps during testing");
     26 
     27 DEFINE_int32(max_bad_regexp_inputs, 1,
     28              "Stop testing a regular expression after finding this many "
     29              "strings that break it.");
     30 
     31 // Compiled in debug mode, the usual tests run for over an hour.
     32 // Have to cut it down to make the unit test machines happy.
     33 DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode.");
     34 
     35 namespace re2 {
     36 
     37 static char* escape(const StringPiece& sp) {
     38   static char buf[512];
     39   char* p = buf;
     40   *p++ = '\"';
     41   for (int i = 0; i < sp.size(); i++) {
     42     if(p+5 >= buf+sizeof buf)
     43       LOG(FATAL) << "ExhaustiveTester escape: too long";
     44     if(sp[i] == '\\' || sp[i] == '\"') {
     45       *p++ = '\\';
     46       *p++ = sp[i];
     47     } else if(sp[i] == '\n') {
     48       *p++ = '\\';
     49       *p++ = 'n';
     50     } else {
     51       *p++ = sp[i];
     52     }
     53   }
     54   *p++ = '\"';
     55   *p = '\0';
     56   return buf;
     57 }
     58 
     59 static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
     60   if (!re.Match(input, 0, input.size(), anchor, m, n)) {
     61     printf("-");
     62     return;
     63   }
     64   for (int i = 0; i < n; i++) {
     65     if (i > 0)
     66       printf(" ");
     67     if (m[i].begin() == NULL)
     68       printf("-");
     69     else
     70       printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin()));
     71   }
     72 }
     73 
     74 // Processes a single generated regexp.
     75 // Compiles it using Regexp interface and PCRE, and then
     76 // checks that NFA, DFA, and PCRE all return the same results.
     77 void ExhaustiveTester::HandleRegexp(const string& const_regexp) {
     78   regexps_++;
     79   string regexp = const_regexp;
     80   if (!topwrapper_.empty())
     81     regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
     82 
     83   if (FLAGS_show_regexps) {
     84     printf("\r%s", regexp.c_str());
     85     fflush(stdout);
     86   }
     87 
     88   if (LOGGING) {
     89     // Write out test cases and answers for use in testing
     90     // other implementations, such as Go's regexp package.
     91     if (randomstrings_)
     92       LOG(ERROR) << "Cannot log with random strings.";
     93     if (regexps_ == 1) {  // first
     94       printf("strings\n");
     95       strgen_.Reset();
     96       while (strgen_.HasNext())
     97         printf("%s\n", escape(strgen_.Next()));
     98       printf("regexps\n");
     99     }
    100     printf("%s\n", escape(regexp));
    101 
    102     RE2 re(regexp);
    103     RE2::Options longest;
    104     longest.set_longest_match(true);
    105     RE2 relongest(regexp, longest);
    106     int ngroup = re.NumberOfCapturingGroups()+1;
    107     StringPiece* group = new StringPiece[ngroup];
    108 
    109     strgen_.Reset();
    110     while (strgen_.HasNext()) {
    111       StringPiece input = strgen_.Next();
    112       PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
    113       printf(";");
    114       PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
    115       printf(";");
    116       PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
    117       printf(";");
    118       PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
    119       printf("\n");
    120     }
    121     delete[] group;
    122     return;
    123   }
    124 
    125   Tester tester(regexp);
    126   if (tester.error())
    127     return;
    128 
    129   strgen_.Reset();
    130   strgen_.GenerateNULL();
    131   if (randomstrings_)
    132     strgen_.Random(stringseed_, stringcount_);
    133   int bad_inputs = 0;
    134   while (strgen_.HasNext()) {
    135     tests_++;
    136     if (!tester.TestInput(strgen_.Next())) {
    137       failures_++;
    138       if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
    139         break;
    140     }
    141   }
    142 }
    143 
    144 // Runs an exhaustive test on the given parameters.
    145 void ExhaustiveTest(int maxatoms, int maxops,
    146                     const vector<string>& alphabet,
    147                     const vector<string>& ops,
    148                     int maxstrlen, const vector<string>& stralphabet,
    149                     const string& wrapper,
    150                     const string& topwrapper) {
    151   if (DEBUG_MODE && FLAGS_quick_debug_mode) {
    152     if (maxatoms > 1)
    153       maxatoms--;
    154     if (maxops > 1)
    155       maxops--;
    156     if (maxstrlen > 1)
    157       maxstrlen--;
    158   }
    159   ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
    160                      maxstrlen, stralphabet, wrapper,
    161                      topwrapper);
    162   t.Generate();
    163   if (!LOGGING) {
    164     printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
    165            t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
    166   }
    167   EXPECT_EQ(0, t.failures());
    168 }
    169 
    170 // Runs an exhaustive test using the given parameters and
    171 // the basic egrep operators.
    172 void EgrepTest(int maxatoms, int maxops, const string& alphabet,
    173                int maxstrlen, const string& stralphabet,
    174                const string& wrapper) {
    175   const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
    176 
    177   for (int i = 0; i < arraysize(tops); i++) {
    178     ExhaustiveTest(maxatoms, maxops,
    179                    Split("", alphabet),
    180                    RegexpGenerator::EgrepOps(),
    181                    maxstrlen,
    182                    Split("", stralphabet),
    183                    wrapper,
    184                    tops[i]);
    185   }
    186 }
    187 
    188 }  // namespace re2
    189