Home | History | Annotate | Download | only in testing
      1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Exhaustive testing of regular expression matching.
      6 
      7 #include "util/test.h"
      8 #include "re2/testing/exhaustive_tester.h"
      9 
     10 namespace re2 {
     11 
     12 // Test simple character classes by themselves.
     13 TEST(CharacterClasses, Exhaustive) {
     14   vector<string> atoms = Split(" ",
     15     "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
     16   ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),
     17                  5, Explode("ab"), "", "");
     18 }
     19 
     20 // Test simple character classes inside a___b (for example, a[a]b).
     21 TEST(CharacterClasses, ExhaustiveAB) {
     22   vector<string> atoms = Split(" ",
     23     "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
     24   ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),
     25                  5, Explode("ab"), "a%sb", "");
     26 }
     27 
     28 // Returns UTF8 for Rune r
     29 static string UTF8(Rune r) {
     30   char buf[UTFmax+1];
     31   buf[runetochar(buf, &r)] = 0;
     32   return string(buf);
     33 }
     34 
     35 // Returns a vector of "interesting" UTF8 characters.
     36 // Unicode is now too big to just return all of them,
     37 // so UTF8Characters return a set likely to be good test cases.
     38 static const vector<string>& InterestingUTF8() {
     39   static bool init;
     40   static vector<string> v;
     41 
     42   if (init)
     43     return v;
     44 
     45   init = true;
     46   // All the Latin1 equivalents are interesting.
     47   for (int i = 1; i < 256; i++)
     48     v.push_back(UTF8(i));
     49 
     50   // After that, the codes near bit boundaries are
     51   // interesting, because they span byte sequence lengths.
     52   for (int j = 0; j < 8; j++)
     53     v.push_back(UTF8(256 + j));
     54   for (int i = 512; i < Runemax; i <<= 1)
     55     for (int j = -8; j < 8; j++)
     56       v.push_back(UTF8(i + j));
     57 
     58   // The codes near Runemax, including Runemax itself, are interesting.
     59   for (int j = -8; j <= 0; j++)
     60     v.push_back(UTF8(Runemax + j));
     61 
     62   return v;
     63 }
     64 
     65 // Test interesting UTF-8 characters against character classes.
     66 TEST(InterestingUTF8, SingleOps) {
     67   vector<string> atoms = Split(" ",
     68     ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
     69     "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
     70     "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
     71     "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
     72   vector<string> ops;  // no ops
     73   ExhaustiveTest(1, 0, atoms, ops,
     74                  1, InterestingUTF8(), "", "");
     75 }
     76 
     77 // Test interesting UTF-8 characters against character classes,
     78 // but wrap everything inside AB.
     79 TEST(InterestingUTF8, AB) {
     80   vector<string> atoms = Split(" ",
     81     ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
     82     "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
     83     "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
     84     "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
     85   vector<string> ops;  // no ops
     86   vector<string> alpha = InterestingUTF8();
     87   for (int i = 0; i < alpha.size(); i++)
     88     alpha[i] = "a" + alpha[i] + "b";
     89   ExhaustiveTest(1, 0, atoms, ops,
     90                  1, alpha, "a%sb", "");
     91 }
     92 
     93 }  // namespace re2
     94 
     95