Home | History | Annotate | Download | only in testing
      1 // Copyright 2006 The RE2 Authors.  All Rights Reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // Test character class manipulations.
      6 
      7 #include "util/test.h"
      8 #include "re2/regexp.h"
      9 
     10 namespace re2 {
     11 
     12 struct CCTest {
     13   struct {
     14     Rune lo;
     15     Rune hi;
     16   } add[10];
     17   int remove;
     18   struct {
     19     Rune lo;
     20     Rune hi;
     21   } final[10];
     22 };
     23 
     24 static CCTest tests[] = {
     25   { { { 10, 20 }, {-1} }, -1,
     26     { { 10, 20 }, {-1} } },
     27 
     28   { { { 10, 20 }, { 20, 30 }, {-1} }, -1,
     29     { { 10, 30 }, {-1} } },
     30 
     31   { { { 10, 20 }, { 30, 40 }, { 20, 30 }, {-1} }, -1,
     32     { { 10, 40 }, {-1} } },
     33 
     34   { { { 0, 50 }, { 20, 30 }, {-1} }, -1,
     35     { { 0, 50 }, {-1} } },
     36 
     37   { { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} }, -1,
     38     { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
     39 
     40   { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
     41     { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
     42 
     43   { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, {-1} }, -1,
     44     { { 10, 11 }, { 13, 14 }, { 16, 17 }, { 19, 20 }, { 22, 23 }, {-1} } },
     45 
     46   { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 5, 25 }, {-1} }, -1,
     47     { { 5, 25 }, {-1} } },
     48 
     49   { { { 13, 14 }, { 10, 11 }, { 22, 23 }, { 19, 20 }, { 16, 17 }, { 12, 21 }, {-1} }, -1,
     50     { { 10, 23 }, {-1} } },
     51 
     52   // These check boundary cases during negation.
     53   { { { 0, Runemax }, {-1} }, -1,
     54     { { 0, Runemax }, {-1} } },
     55 
     56   { { { 0, 50 }, {-1} }, -1,
     57     { { 0, 50 }, {-1} } },
     58 
     59   { { { 50, Runemax }, {-1} }, -1,
     60     { { 50, Runemax }, {-1} } },
     61 
     62   // Check RemoveAbove.
     63   { { { 50, Runemax }, {-1} }, 255,
     64     { { 50, 255 }, {-1} } },
     65 
     66   { { { 50, Runemax }, {-1} }, 65535,
     67     { { 50, 65535 }, {-1} } },
     68 
     69   { { { 50, Runemax }, {-1} }, Runemax,
     70     { { 50, Runemax }, {-1} } },
     71 
     72   { { { 50, 60 }, { 250, 260 }, { 350, 360 }, {-1} }, 255,
     73     { { 50, 60 }, { 250, 255 }, {-1} } },
     74 
     75   { { { 50, 60 }, {-1} }, 255,
     76     { { 50, 60 }, {-1} } },
     77 
     78   { { { 350, 360 }, {-1} }, 255,
     79     { {-1} } },
     80 
     81   { { {-1} }, 255,
     82     { {-1} } },
     83 };
     84 
     85 template<class CharClass>
     86 static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
     87   if (t == NULL) {
     88     printf("\t%s:", desc);
     89   } else {
     90     printf("\n");
     91     printf("CharClass added: [%s]", desc);
     92     for (int k = 0; t->add[k].lo >= 0; k++)
     93       printf(" %d-%d", t->add[k].lo, t->add[k].hi);
     94     printf("\n");
     95     if (t->remove >= 0)
     96       printf("Removed > %d\n", t->remove);
     97     printf("\twant:");
     98     for (int k = 0; t->final[k].lo >= 0; k++)
     99       printf(" %d-%d", t->final[k].lo, t->final[k].hi);
    100     printf("\n");
    101     printf("\thave:");
    102   }
    103 
    104   for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
    105     printf(" %d-%d", it->lo, it->hi);
    106   printf("\n");
    107 }
    108 
    109 bool ShouldContain(CCTest *t, int x) {
    110   for (int j = 0; t->final[j].lo >= 0; j++)
    111     if (t->final[j].lo <= x && x <= t->final[j].hi)
    112       return true;
    113   return false;
    114 }
    115 
    116 // Helpers to make templated CorrectCC work with both CharClass and CharClassBuilder.
    117 
    118 CharClass* Negate(CharClass *cc) {
    119   return cc->Negate();
    120 }
    121 
    122 void Delete(CharClass* cc) {
    123   cc->Delete();
    124 }
    125 
    126 CharClassBuilder* Negate(CharClassBuilder* cc) {
    127   CharClassBuilder* ncc = cc->Copy();
    128   ncc->Negate();
    129   return ncc;
    130 }
    131 
    132 void Delete(CharClassBuilder* cc) {
    133   delete cc;
    134 }
    135 
    136 template<class CharClass>
    137 bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
    138   typename CharClass::iterator it = cc->begin();
    139   int size = 0;
    140   for (int j = 0; t->final[j].lo >= 0; j++, ++it) {
    141     if (it == cc->end() ||
    142         it->lo != t->final[j].lo ||
    143         it->hi != t->final[j].hi) {
    144       Broke(desc, t, cc);
    145       return false;
    146     }
    147     size += it->hi - it->lo + 1;
    148   }
    149   if (it != cc->end()) {
    150     Broke(desc, t, cc);
    151     return false;
    152   }
    153   if (cc->size() != size) {
    154     Broke(desc, t, cc);
    155     printf("wrong size: want %d have %d\n", size, cc->size());
    156     return false;
    157   }
    158 
    159   for (int j = 0; j < 101; j++) {
    160     if (j == 100)
    161       j = Runemax;
    162     if (ShouldContain(t, j) != cc->Contains(j)) {
    163       Broke(desc, t, cc);
    164       printf("want contains(%d)=%d, got %d\n",
    165              j, ShouldContain(t, j), cc->Contains(j));
    166       return false;
    167     }
    168   }
    169 
    170   CharClass* ncc = Negate(cc);
    171   for (int j = 0; j < 101; j++) {
    172     if (j == 100)
    173       j = Runemax;
    174     if (ShouldContain(t, j) == ncc->Contains(j)) {
    175       Broke(desc, t, cc);
    176       Broke("ncc", NULL, ncc);
    177       printf("want ncc contains(%d)!=%d, got %d\n",
    178              j, ShouldContain(t, j), ncc->Contains(j));
    179       Delete(ncc);
    180       return false;
    181     }
    182     if (ncc->size() != Runemax+1 - cc->size()) {
    183       Broke(desc, t, cc);
    184       Broke("ncc", NULL, ncc);
    185       printf("ncc size should be %d is %d\n",
    186              Runemax+1 - cc->size(), ncc->size());
    187       Delete(ncc);
    188       return false;
    189     }
    190   }
    191   Delete(ncc);
    192   return true;
    193 }
    194 
    195 TEST(TestCharClassBuilder, Adds) {
    196   int nfail = 0;
    197   for (int i = 0; i < arraysize(tests); i++) {
    198     CharClassBuilder ccb;
    199     CCTest* t = &tests[i];
    200     for (int j = 0; t->add[j].lo >= 0; j++)
    201       ccb.AddRange(t->add[j].lo, t->add[j].hi);
    202     if (t->remove >= 0)
    203       ccb.RemoveAbove(t->remove);
    204     if (!CorrectCC(&ccb, t, "before copy (CharClassBuilder)"))
    205       nfail++;
    206     CharClass* cc = ccb.GetCharClass();
    207     if (!CorrectCC(cc, t, "before copy (CharClass)"))
    208       nfail++;
    209     cc->Delete();
    210 
    211     CharClassBuilder *ccb1 = ccb.Copy();
    212     if (!CorrectCC(ccb1, t, "after copy (CharClassBuilder)"))
    213       nfail++;
    214     cc = ccb.GetCharClass();
    215     if (!CorrectCC(cc, t, "after copy (CharClass)"))
    216       nfail++;
    217     cc->Delete();
    218     delete ccb1;
    219   }
    220   EXPECT_EQ(nfail, 0);
    221 }
    222 
    223 }  // namespace re2
    224