Home | History | Annotate | Download | only in testing
      1 // Copyright 2006 The RE2 Authors.  All Rights Reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Dump the regexp into a string showing structure.
      6 // Tested by parse_unittest.cc
      7 
      8 // This function traverses the regexp recursively,
      9 // meaning that on inputs like Regexp::Simplify of
     10 // a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},
     11 // it takes time and space exponential in the size of the
     12 // original regular expression.  It can also use stack space
     13 // linear in the size of the regular expression for inputs
     14 // like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.
     15 // IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.
     16 // As a result, Dump is provided only in the testing
     17 // library (see BUILD).
     18 
     19 #include <string>
     20 #include <vector>
     21 #include "util/test.h"
     22 #include "re2/stringpiece.h"
     23 #include "re2/regexp.h"
     24 
     25 // Cause a link error if this file is used outside of testing.
     26 DECLARE_string(test_tmpdir);
     27 
     28 namespace re2 {
     29 
     30 static const char* kOpcodeNames[] = {
     31   "bad",
     32   "no",
     33   "emp",
     34   "lit",
     35   "str",
     36   "cat",
     37   "alt",
     38   "star",
     39   "plus",
     40   "que",
     41   "rep",
     42   "cap",
     43   "dot",
     44   "byte",
     45   "bol",
     46   "eol",
     47   "wb",   // kRegexpWordBoundary
     48   "nwb",  // kRegexpNoWordBoundary
     49   "bot",
     50   "eot",
     51   "cc",
     52   "match",
     53 };
     54 
     55 // Create string representation of regexp with explicit structure.
     56 // Nothing pretty, just for testing.
     57 static void DumpRegexpAppending(Regexp* re, string* s) {
     58   if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
     59     StringAppendF(s, "op%d", re->op());
     60   } else {
     61     switch (re->op()) {
     62       default:
     63         break;
     64       case kRegexpStar:
     65       case kRegexpPlus:
     66       case kRegexpQuest:
     67       case kRegexpRepeat:
     68         if (re->parse_flags() & Regexp::NonGreedy)
     69           s->append("n");
     70         break;
     71     }
     72     s->append(kOpcodeNames[re->op()]);
     73     if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {
     74       Rune r = re->rune();
     75       if ('a' <= r && r <= 'z')
     76         s->append("fold");
     77     }
     78     if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {
     79       for (int i = 0; i < re->nrunes(); i++) {
     80         Rune r = re->runes()[i];
     81         if ('a' <= r && r <= 'z') {
     82           s->append("fold");
     83           break;
     84         }
     85       }
     86     }
     87   }
     88   s->append("{");
     89   switch (re->op()) {
     90     default:
     91       break;
     92     case kRegexpEndText:
     93       if (!(re->parse_flags() & Regexp::WasDollar)) {
     94         s->append("\\z");
     95       }
     96       break;
     97     case kRegexpLiteral: {
     98       Rune r = re->rune();
     99       char buf[UTFmax+1];
    100       buf[runetochar(buf, &r)] = 0;
    101       s->append(buf);
    102       break;
    103     }
    104     case kRegexpLiteralString:
    105       for (int i = 0; i < re->nrunes(); i++) {
    106         Rune r = re->runes()[i];
    107         char buf[UTFmax+1];
    108         buf[runetochar(buf, &r)] = 0;
    109         s->append(buf);
    110       }
    111       break;
    112     case kRegexpConcat:
    113     case kRegexpAlternate:
    114       for (int i = 0; i < re->nsub(); i++)
    115         DumpRegexpAppending(re->sub()[i], s);
    116       break;
    117     case kRegexpStar:
    118     case kRegexpPlus:
    119     case kRegexpQuest:
    120       DumpRegexpAppending(re->sub()[0], s);
    121       break;
    122     case kRegexpCapture:
    123       if (re->name()) {
    124         s->append(*re->name());
    125         s->append(":");
    126       }
    127       DumpRegexpAppending(re->sub()[0], s);
    128       break;
    129     case kRegexpRepeat:
    130       s->append(StringPrintf("%d,%d ", re->min(), re->max()));
    131       DumpRegexpAppending(re->sub()[0], s);
    132       break;
    133     case kRegexpCharClass: {
    134       string sep;
    135       for (CharClass::iterator it = re->cc()->begin();
    136            it != re->cc()->end(); ++it) {
    137         RuneRange rr = *it;
    138         s->append(sep);
    139         if (rr.lo == rr.hi)
    140           s->append(StringPrintf("%#x", rr.lo));
    141         else
    142           s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
    143         sep = " ";
    144       }
    145       break;
    146     }
    147   }
    148   s->append("}");
    149 }
    150 
    151 string Regexp::Dump() {
    152   string s;
    153 
    154   // Make sure being called from a unit test.
    155   if (FLAGS_test_tmpdir.empty()) {
    156     LOG(ERROR) << "Cannot use except for testing.";
    157     return s;
    158   }
    159 
    160   DumpRegexpAppending(this, &s);
    161   return s;
    162 }
    163 
    164 }  // namespace re2
    165