Home | History | Annotate | Download | only in intltest
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2010-2014, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  bytetrietest.cpp
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2010nov16
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include <string.h>
     18 
     19 #include "unicode/utypes.h"
     20 #include "unicode/bytestrie.h"
     21 #include "unicode/bytestriebuilder.h"
     22 #include "unicode/localpointer.h"
     23 #include "unicode/stringpiece.h"
     24 #include "intltest.h"
     25 #include "cmemory.h"
     26 
     27 struct StringAndValue {
     28     const char *s;
     29     int32_t value;
     30 };
     31 
     32 class BytesTrieTest : public IntlTest {
     33 public:
     34     BytesTrieTest();
     35     virtual ~BytesTrieTest();
     36 
     37     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
     38     void TestBuilder();
     39     void TestEmpty();
     40     void Test_a();
     41     void Test_a_ab();
     42     void TestShortestBranch();
     43     void TestBranches();
     44     void TestLongSequence();
     45     void TestLongBranch();
     46     void TestValuesForState();
     47     void TestCompact();
     48 
     49     BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
     50     void TestHasUniqueValue();
     51     void TestGetNextBytes();
     52     void TestIteratorFromBranch();
     53     void TestIteratorFromLinearMatch();
     54     void TestTruncatingIteratorFromRoot();
     55     void TestTruncatingIteratorFromLinearMatchShort();
     56     void TestTruncatingIteratorFromLinearMatchLong();
     57     void TestIteratorFromBytes();
     58     void TestFailedIterator();
     59 
     60     void checkData(const StringAndValue data[], int32_t dataLength);
     61     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
     62     BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
     63                          UStringTrieBuildOption buildOption);
     64     void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     65     void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     66     void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     67     void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     68     void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     69     void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
     70 
     71 private:
     72     BytesTrieBuilder *builder_;
     73 };
     74 
     75 extern IntlTest *createBytesTrieTest() {
     76     return new BytesTrieTest();
     77 }
     78 
     79 BytesTrieTest::BytesTrieTest() : builder_(NULL) {
     80     IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
     81     builder_=new BytesTrieBuilder(errorCode);
     82 }
     83 
     84 BytesTrieTest::~BytesTrieTest() {
     85     delete builder_;
     86 }
     87 
     88 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
     89     if(exec) {
     90         logln("TestSuite BytesTrieTest: ");
     91     }
     92     TESTCASE_AUTO_BEGIN;
     93     TESTCASE_AUTO(TestBuilder);
     94     TESTCASE_AUTO(TestEmpty);
     95     TESTCASE_AUTO(Test_a);
     96     TESTCASE_AUTO(Test_a_ab);
     97     TESTCASE_AUTO(TestShortestBranch);
     98     TESTCASE_AUTO(TestBranches);
     99     TESTCASE_AUTO(TestLongSequence);
    100     TESTCASE_AUTO(TestLongBranch);
    101     TESTCASE_AUTO(TestValuesForState);
    102     TESTCASE_AUTO(TestCompact);
    103     TESTCASE_AUTO(TestHasUniqueValue);
    104     TESTCASE_AUTO(TestGetNextBytes);
    105     TESTCASE_AUTO(TestIteratorFromBranch);
    106     TESTCASE_AUTO(TestIteratorFromLinearMatch);
    107     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
    108     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
    109     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
    110     TESTCASE_AUTO(TestIteratorFromBytes);
    111     TESTCASE_AUTO(TestFailedIterator);
    112     TESTCASE_AUTO_END;
    113 }
    114 
    115 void BytesTrieTest::TestBuilder() {
    116     IcuTestErrorCode errorCode(*this, "TestBuilder()");
    117     builder_->clear();
    118     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
    119     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
    120         errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
    121         return;
    122     }
    123     // TODO: remove .build(...) once add() checks for duplicates.
    124     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
    125     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
    126         errln("BytesTrieBuilder.add() did not detect duplicates");
    127         return;
    128     }
    129 }
    130 
    131 void BytesTrieTest::TestEmpty() {
    132     static const StringAndValue data[]={
    133         { "", 0 }
    134     };
    135     checkData(data, UPRV_LENGTHOF(data));
    136 }
    137 
    138 void BytesTrieTest::Test_a() {
    139     static const StringAndValue data[]={
    140         { "a", 1 }
    141     };
    142     checkData(data, UPRV_LENGTHOF(data));
    143 }
    144 
    145 void BytesTrieTest::Test_a_ab() {
    146     static const StringAndValue data[]={
    147         { "a", 1 },
    148         { "ab", 100 }
    149     };
    150     checkData(data, UPRV_LENGTHOF(data));
    151 }
    152 
    153 void BytesTrieTest::TestShortestBranch() {
    154     static const StringAndValue data[]={
    155         { "a", 1000 },
    156         { "b", 2000 }
    157     };
    158     checkData(data, UPRV_LENGTHOF(data));
    159 }
    160 
    161 void BytesTrieTest::TestBranches() {
    162     static const StringAndValue data[]={
    163         { "a", 0x10 },
    164         { "cc", 0x40 },
    165         { "e", 0x100 },
    166         { "ggg", 0x400 },
    167         { "i", 0x1000 },
    168         { "kkkk", 0x4000 },
    169         { "n", 0x10000 },
    170         { "ppppp", 0x40000 },
    171         { "r", 0x100000 },
    172         { "sss", 0x200000 },
    173         { "t", 0x400000 },
    174         { "uu", 0x800000 },
    175         { "vv", 0x7fffffff },
    176         { "zz", (int32_t)0x80000000 }
    177     };
    178     for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
    179         logln("TestBranches length=%d", (int)length);
    180         checkData(data, length);
    181     }
    182 }
    183 
    184 void BytesTrieTest::TestLongSequence() {
    185     static const StringAndValue data[]={
    186         { "a", -1 },
    187         // sequence of linear-match nodes
    188         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
    189         // more than 256 bytes
    190         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    191           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    192           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    193           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    194           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    195           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
    196     };
    197     checkData(data, UPRV_LENGTHOF(data));
    198 }
    199 
    200 void BytesTrieTest::TestLongBranch() {
    201     // Split-branch and interesting compact-integer values.
    202     static const StringAndValue data[]={
    203         { "a", -2 },
    204         { "b", -1 },
    205         { "c", 0 },
    206         { "d2", 1 },
    207         { "f", 0x3f },
    208         { "g", 0x40 },
    209         { "h", 0x41 },
    210         { "j23", 0x1900 },
    211         { "j24", 0x19ff },
    212         { "j25", 0x1a00 },
    213         { "k2", 0x1a80 },
    214         { "k3", 0x1aff },
    215         { "l234567890", 0x1b00 },
    216         { "l234567890123", 0x1b01 },
    217         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
    218         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
    219         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
    220         { "r", 0x333333 },
    221         { "s2345", 0x4444444 },
    222         { "t234567890", 0x77777777 },
    223         { "z", (int32_t)0x80000001 }
    224     };
    225     checkData(data, UPRV_LENGTHOF(data));
    226 }
    227 
    228 void BytesTrieTest::TestValuesForState() {
    229     // Check that saveState() and resetToState() interact properly
    230     // with next() and current().
    231     static const StringAndValue data[]={
    232         { "a", -1 },
    233         { "ab", -2 },
    234         { "abc", -3 },
    235         { "abcd", -4 },
    236         { "abcde", -5 },
    237         { "abcdef", -6 }
    238     };
    239     checkData(data, UPRV_LENGTHOF(data));
    240 }
    241 
    242 void BytesTrieTest::TestCompact() {
    243     // Duplicate trailing strings and values provide opportunities for compacting.
    244     static const StringAndValue data[]={
    245         { "+", 0 },
    246         { "+august", 8 },
    247         { "+december", 12 },
    248         { "+july", 7 },
    249         { "+june", 6 },
    250         { "+november", 11 },
    251         { "+october", 10 },
    252         { "+september", 9 },
    253         { "-", 0 },
    254         { "-august", 8 },
    255         { "-december", 12 },
    256         { "-july", 7 },
    257         { "-june", 6 },
    258         { "-november", 11 },
    259         { "-october", 10 },
    260         { "-september", 9 },
    261         // The l+n branch (with its sub-nodes) is a duplicate but will be written
    262         // both times because each time it follows a different linear-match node.
    263         { "xjuly", 7 },
    264         { "xjune", 6 }
    265     };
    266     checkData(data, UPRV_LENGTHOF(data));
    267 }
    268 
    269 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
    270     // All types of nodes leading to the same value,
    271     // for code coverage of recursive functions.
    272     // In particular, we need a lot of branches on some single level
    273     // to exercise a split-branch node.
    274     static const StringAndValue data[]={
    275         { "august", 8 },
    276         { "jan", 1 },
    277         { "jan.", 1 },
    278         { "jana", 1 },
    279         { "janbb", 1 },
    280         { "janc", 1 },
    281         { "janddd", 1 },
    282         { "janee", 1 },
    283         { "janef", 1 },
    284         { "janf", 1 },
    285         { "jangg", 1 },
    286         { "janh", 1 },
    287         { "janiiii", 1 },
    288         { "janj", 1 },
    289         { "jankk", 1 },
    290         { "jankl", 1 },
    291         { "jankmm", 1 },
    292         { "janl", 1 },
    293         { "janm", 1 },
    294         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    295         { "jano", 1 },
    296         { "janpp", 1 },
    297         { "janqqq", 1 },
    298         { "janr", 1 },
    299         { "januar", 1 },
    300         { "january", 1 },
    301         { "july", 7 },
    302         { "jun", 6 },
    303         { "jun.", 6 },
    304         { "june", 6 }
    305     };
    306     return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
    307 }
    308 
    309 void BytesTrieTest::TestHasUniqueValue() {
    310     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    311     if(trie.isNull()) {
    312         return;  // buildTrie() reported an error
    313     }
    314     int32_t uniqueValue;
    315     if(trie->hasUniqueValue(uniqueValue)) {
    316         errln("unique value at root");
    317     }
    318     trie->next('j');
    319     trie->next('a');
    320     trie->next('n');
    321     // hasUniqueValue() directly after next()
    322     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
    323         errln("not unique value 1 after \"jan\"");
    324     }
    325     trie->first('j');
    326     trie->next('u');
    327     if(trie->hasUniqueValue(uniqueValue)) {
    328         errln("unique value after \"ju\"");
    329     }
    330     if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
    331         errln("not normal value 6 after \"jun\"");
    332     }
    333     // hasUniqueValue() after getValue()
    334     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
    335         errln("not unique value 6 after \"jun\"");
    336     }
    337     // hasUniqueValue() from within a linear-match node
    338     trie->first('a');
    339     trie->next('u');
    340     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
    341         errln("not unique value 8 after \"au\"");
    342     }
    343 }
    344 
    345 void BytesTrieTest::TestGetNextBytes() {
    346     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    347     if(trie.isNull()) {
    348         return;  // buildTrie() reported an error
    349     }
    350     char buffer[40];
    351     CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
    352     int32_t count=trie->getNextBytes(sink);
    353     if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
    354         errln("months getNextBytes()!=[aj] at root");
    355     }
    356     trie->next('j');
    357     trie->next('a');
    358     trie->next('n');
    359     // getNextBytes() directly after next()
    360     count=trie->getNextBytes(sink.Reset());
    361     buffer[count]=0;
    362     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
    363         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
    364     }
    365     // getNextBytes() after getValue()
    366     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
    367     memset(buffer, 0, sizeof(buffer));
    368     count=trie->getNextBytes(sink.Reset());
    369     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
    370         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
    371     }
    372     // getNextBytes() from a linear-match node
    373     trie->next('u');
    374     memset(buffer, 0, sizeof(buffer));
    375     count=trie->getNextBytes(sink.Reset());
    376     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
    377         errln("months getNextBytes()!=[a] after \"janu\"");
    378     }
    379     trie->next('a');
    380     memset(buffer, 0, sizeof(buffer));
    381     count=trie->getNextBytes(sink.Reset());
    382     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
    383         errln("months getNextBytes()!=[r] after \"janua\"");
    384     }
    385     trie->next('r');
    386     trie->next('y');
    387     // getNextBytes() after a final match
    388     count=trie->getNextBytes(sink.Reset());
    389     if(count!=0 || sink.NumberOfBytesAppended()!=0) {
    390         errln("months getNextBytes()!=[] after \"january\"");
    391     }
    392 }
    393 
    394 void BytesTrieTest::TestIteratorFromBranch() {
    395     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    396     if(trie.isNull()) {
    397         return;  // buildTrie() reported an error
    398     }
    399     // Go to a branch node.
    400     trie->next('j');
    401     trie->next('a');
    402     trie->next('n');
    403     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
    404     BytesTrie::Iterator iter(*trie, 0, errorCode);
    405     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    406         return;
    407     }
    408     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    409     // following "jan".
    410     static const StringAndValue data[]={
    411         { "", 1 },
    412         { ".", 1 },
    413         { "a", 1 },
    414         { "bb", 1 },
    415         { "c", 1 },
    416         { "ddd", 1 },
    417         { "ee", 1 },
    418         { "ef", 1 },
    419         { "f", 1 },
    420         { "gg", 1 },
    421         { "h", 1 },
    422         { "iiii", 1 },
    423         { "j", 1 },
    424         { "kk", 1 },
    425         { "kl", 1 },
    426         { "kmm", 1 },
    427         { "l", 1 },
    428         { "m", 1 },
    429         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    430         { "o", 1 },
    431         { "pp", 1 },
    432         { "qqq", 1 },
    433         { "r", 1 },
    434         { "uar", 1 },
    435         { "uary", 1 }
    436     };
    437     checkIterator(iter, data, UPRV_LENGTHOF(data));
    438     // Reset, and we should get the same result.
    439     logln("after iter.reset()");
    440     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    441 }
    442 
    443 void BytesTrieTest::TestIteratorFromLinearMatch() {
    444     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    445     if(trie.isNull()) {
    446         return;  // buildTrie() reported an error
    447     }
    448     // Go into a linear-match node.
    449     trie->next('j');
    450     trie->next('a');
    451     trie->next('n');
    452     trie->next('u');
    453     trie->next('a');
    454     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
    455     BytesTrie::Iterator iter(*trie, 0, errorCode);
    456     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    457         return;
    458     }
    459     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    460     // following "janua".
    461     static const StringAndValue data[]={
    462         { "r", 1 },
    463         { "ry", 1 }
    464     };
    465     checkIterator(iter, data, UPRV_LENGTHOF(data));
    466     // Reset, and we should get the same result.
    467     logln("after iter.reset()");
    468     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    469 }
    470 
    471 void BytesTrieTest::TestTruncatingIteratorFromRoot() {
    472     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    473     if(trie.isNull()) {
    474         return;  // buildTrie() reported an error
    475     }
    476     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
    477     BytesTrie::Iterator iter(*trie, 4, errorCode);
    478     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    479         return;
    480     }
    481     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
    482     // of each string, and no string duplicates from the truncation.
    483     static const StringAndValue data[]={
    484         { "augu", -1 },
    485         { "jan", 1 },
    486         { "jan.", 1 },
    487         { "jana", 1 },
    488         { "janb", -1 },
    489         { "janc", 1 },
    490         { "jand", -1 },
    491         { "jane", -1 },
    492         { "janf", 1 },
    493         { "jang", -1 },
    494         { "janh", 1 },
    495         { "jani", -1 },
    496         { "janj", 1 },
    497         { "jank", -1 },
    498         { "janl", 1 },
    499         { "janm", 1 },
    500         { "jann", -1 },
    501         { "jano", 1 },
    502         { "janp", -1 },
    503         { "janq", -1 },
    504         { "janr", 1 },
    505         { "janu", -1 },
    506         { "july", 7 },
    507         { "jun", 6 },
    508         { "jun.", 6 },
    509         { "june", 6 }
    510     };
    511     checkIterator(iter, data, UPRV_LENGTHOF(data));
    512     // Reset, and we should get the same result.
    513     logln("after iter.reset()");
    514     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    515 }
    516 
    517 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
    518     static const StringAndValue data[]={
    519         { "abcdef", 10 },
    520         { "abcdepq", 200 },
    521         { "abcdeyz", 3000 }
    522     };
    523     LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    524     if(trie.isNull()) {
    525         return;  // buildTrie() reported an error
    526     }
    527     // Go into a linear-match node.
    528     trie->next('a');
    529     trie->next('b');
    530     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
    531     // Truncate within the linear-match node.
    532     BytesTrie::Iterator iter(*trie, 2, errorCode);
    533     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    534         return;
    535     }
    536     static const StringAndValue expected[]={
    537         { "cd", -1 }
    538     };
    539     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
    540     // Reset, and we should get the same result.
    541     logln("after iter.reset()");
    542     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
    543 }
    544 
    545 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
    546     static const StringAndValue data[]={
    547         { "abcdef", 10 },
    548         { "abcdepq", 200 },
    549         { "abcdeyz", 3000 }
    550     };
    551     LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    552     if(trie.isNull()) {
    553         return;  // buildTrie() reported an error
    554     }
    555     // Go into a linear-match node.
    556     trie->next('a');
    557     trie->next('b');
    558     trie->next('c');
    559     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
    560     // Truncate after the linear-match node.
    561     BytesTrie::Iterator iter(*trie, 3, errorCode);
    562     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    563         return;
    564     }
    565     static const StringAndValue expected[]={
    566         { "def", 10 },
    567         { "dep", -1 },
    568         { "dey", -1 }
    569     };
    570     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
    571     // Reset, and we should get the same result.
    572     logln("after iter.reset()");
    573     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
    574 }
    575 
    576 void BytesTrieTest::TestIteratorFromBytes() {
    577     static const StringAndValue data[]={
    578         { "mm", 3 },
    579         { "mmm", 33 },
    580         { "mmnop", 333 }
    581     };
    582     builder_->clear();
    583     IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
    584     for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
    585         builder_->add(data[i].s, data[i].value, errorCode);
    586     }
    587     StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
    588     BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
    589     checkIterator(iter, data, UPRV_LENGTHOF(data));
    590 }
    591 
    592 void BytesTrieTest::TestFailedIterator() {
    593     UErrorCode failure = U_ILLEGAL_ARGUMENT_ERROR;
    594     BytesTrie::Iterator iter(NULL, 0, failure);
    595     StringPiece sp = iter.getString();
    596     if (!sp.empty()) {
    597         errln("failed iterator returned garbage data");
    598     }
    599 }
    600 
    601 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
    602     logln("checkData(dataLength=%d, fast)", (int)dataLength);
    603     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
    604     logln("checkData(dataLength=%d, small)", (int)dataLength);
    605     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
    606 }
    607 
    608 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
    609     LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
    610     if(trie.isNull()) {
    611         return;  // buildTrie() reported an error
    612     }
    613     checkFirst(*trie, data, dataLength);
    614     checkNext(*trie, data, dataLength);
    615     checkNextWithState(*trie, data, dataLength);
    616     checkNextString(*trie, data, dataLength);
    617     checkIterator(*trie, data, dataLength);
    618 }
    619 
    620 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
    621                                     UStringTrieBuildOption buildOption) {
    622     IcuTestErrorCode errorCode(*this, "buildTrie()");
    623     // Add the items to the trie builder in an interesting (not trivial, not random) order.
    624     int32_t index, step;
    625     if(dataLength&1) {
    626         // Odd number of items.
    627         index=dataLength/2;
    628         step=2;
    629     } else if((dataLength%3)!=0) {
    630         // Not a multiple of 3.
    631         index=dataLength/5;
    632         step=3;
    633     } else {
    634         index=dataLength-1;
    635         step=-1;
    636     }
    637     builder_->clear();
    638     for(int32_t i=0; i<dataLength; ++i) {
    639         builder_->add(data[index].s, data[index].value, errorCode);
    640         index=(index+step)%dataLength;
    641     }
    642     StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
    643     LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
    644     if(!errorCode.errIfFailureAndReset("add()/build()")) {
    645         builder_->add("zzz", 999, errorCode);
    646         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
    647             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
    648         }
    649     }
    650     logln("serialized trie size: %ld bytes\n", (long)sp.length());
    651     StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
    652     if(sp.data()==sp2.data()) {
    653         errln("builder.buildStringPiece() before & after build() returned same array");
    654     }
    655     if(errorCode.isFailure()) {
    656         return NULL;
    657     }
    658     // Tries from either build() method should be identical but
    659     // BytesTrie does not implement equals().
    660     // We just return either one.
    661     if((dataLength&1)!=0) {
    662         return trie.orphan();
    663     } else {
    664         return new BytesTrie(sp2.data());
    665     }
    666 }
    667 
    668 void BytesTrieTest::checkFirst(BytesTrie &trie,
    669                                const StringAndValue data[], int32_t dataLength) {
    670     for(int32_t i=0; i<dataLength; ++i) {
    671         int c=*data[i].s;
    672         if(c==0) {
    673             continue;  // skip empty string
    674         }
    675         UStringTrieResult firstResult=trie.first(c);
    676         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
    677         UStringTrieResult nextResult=trie.next(data[i].s[1]);
    678         if(firstResult!=trie.reset().next(c) ||
    679            firstResult!=trie.current() ||
    680            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
    681            nextResult!=trie.next(data[i].s[1])
    682         ) {
    683             errln("trie.first(%c)!=trie.reset().next(same) for %s",
    684                   c, data[i].s);
    685         }
    686     }
    687     trie.reset();
    688 }
    689 
    690 void BytesTrieTest::checkNext(BytesTrie &trie,
    691                               const StringAndValue data[], int32_t dataLength) {
    692     BytesTrie::State state;
    693     for(int32_t i=0; i<dataLength; ++i) {
    694         int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
    695         UStringTrieResult result;
    696         if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
    697             result!=trie.current()
    698         ) {
    699             errln("trie does not seem to contain %s", data[i].s);
    700         } else if(trie.getValue()!=data[i].value) {
    701             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    702                   data[i].s,
    703                   (long)trie.getValue(), (long)trie.getValue(),
    704                   (long)data[i].value, (long)data[i].value);
    705         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
    706             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
    707         }
    708         trie.reset();
    709         stringLength=strlen(data[i].s);
    710         result=trie.current();
    711         for(int32_t j=0; j<stringLength; ++j) {
    712             if(!USTRINGTRIE_HAS_NEXT(result)) {
    713                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
    714                 break;
    715             }
    716             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
    717                 trie.getValue();
    718                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
    719                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
    720                     break;
    721                 }
    722             }
    723             result=trie.next(data[i].s[j]);
    724             if(!USTRINGTRIE_MATCHES(result)) {
    725                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
    726                 break;
    727             }
    728             if(result!=trie.current()) {
    729                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
    730                 break;
    731             }
    732         }
    733         if(!USTRINGTRIE_HAS_VALUE(result)) {
    734             errln("trie.next()!=hasValue at the end of %s", data[i].s);
    735             continue;
    736         }
    737         trie.getValue();
    738         if(result!=trie.current()) {
    739             errln("trie.current() != current()+getValue()+current() after end of %s",
    740                   data[i].s);
    741         }
    742         // Compare the final current() with whether next() can actually continue.
    743         trie.saveState(state);
    744         UBool nextContinues=FALSE;
    745         // Try all graphic characters; we only use those in test strings in this file.
    746 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    747         const int32_t minChar=0x20;
    748         const int32_t maxChar=0x7e;
    749 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    750         const int32_t minChar=0x40;
    751         const int32_t maxChar=0xfe;
    752 #else
    753         const int32_t minChar=0;
    754         const int32_t maxChar=0xff;
    755 #endif
    756         for(int32_t c=minChar; c<=maxChar; ++c) {
    757             if(trie.resetToState(state).next(c)) {
    758                 nextContinues=TRUE;
    759                 break;
    760             }
    761         }
    762         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
    763             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
    764                   "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
    765         }
    766         trie.reset();
    767     }
    768 }
    769 
    770 void BytesTrieTest::checkNextWithState(BytesTrie &trie,
    771                                        const StringAndValue data[], int32_t dataLength) {
    772     BytesTrie::State noState, state;
    773     for(int32_t i=0; i<dataLength; ++i) {
    774         if((i&1)==0) {
    775             // This should have no effect.
    776             trie.resetToState(noState);
    777         }
    778         const char *expectedString=data[i].s;
    779         int32_t stringLength=strlen(expectedString);
    780         int32_t partialLength=stringLength/3;
    781         for(int32_t j=0; j<partialLength; ++j) {
    782             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
    783                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
    784                 return;
    785             }
    786         }
    787         trie.saveState(state);
    788         UStringTrieResult resultAtState=trie.current();
    789         UStringTrieResult result;
    790         int32_t valueAtState=-99;
    791         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
    792             valueAtState=trie.getValue();
    793         }
    794         result=trie.next(0);  // mismatch
    795         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
    796             errln("trie.next(0) matched after part of %s", data[i].s);
    797         }
    798         if( resultAtState!=trie.resetToState(state).current() ||
    799             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
    800         ) {
    801             errln("trie.next(part of %s) changes current()/getValue() after "
    802                   "saveState/next(0)/resetToState",
    803                   data[i].s);
    804         } else if(!USTRINGTRIE_HAS_VALUE(
    805                       result=trie.next(expectedString+partialLength,
    806                                        stringLength-partialLength)) ||
    807                   result!=trie.current()) {
    808             errln("trie.next(rest of %s) does not seem to contain %s after "
    809                   "saveState/next(0)/resetToState",
    810                   data[i].s, data[i].s);
    811         } else if(!USTRINGTRIE_HAS_VALUE(
    812                       result=trie.resetToState(state).
    813                                   next(expectedString+partialLength,
    814                                        stringLength-partialLength)) ||
    815                   result!=trie.current()) {
    816             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
    817                   data[i].s);
    818         } else if(trie.getValue()!=data[i].value) {
    819             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    820                   data[i].s,
    821                   (long)trie.getValue(), (long)trie.getValue(),
    822                   (long)data[i].value, (long)data[i].value);
    823         }
    824         trie.reset();
    825     }
    826 }
    827 
    828 // next(string) is also tested in other functions,
    829 // but here we try to go partway through the string, and then beyond it.
    830 void BytesTrieTest::checkNextString(BytesTrie &trie,
    831                                     const StringAndValue data[], int32_t dataLength) {
    832     for(int32_t i=0; i<dataLength; ++i) {
    833         const char *expectedString=data[i].s;
    834         int32_t stringLength=strlen(expectedString);
    835         if(!trie.next(expectedString, stringLength/2)) {
    836             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
    837             continue;
    838         }
    839         // Test that we stop properly at the end of the string.
    840         if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
    841             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
    842         }
    843         trie.reset();
    844     }
    845 }
    846 
    847 void BytesTrieTest::checkIterator(const BytesTrie &trie,
    848                                   const StringAndValue data[], int32_t dataLength) {
    849     IcuTestErrorCode errorCode(*this, "checkIterator()");
    850     BytesTrie::Iterator iter(trie, 0, errorCode);
    851     if(errorCode.errIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    852         return;
    853     }
    854     checkIterator(iter, data, dataLength);
    855 }
    856 
    857 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
    858                                   const StringAndValue data[], int32_t dataLength) {
    859     IcuTestErrorCode errorCode(*this, "checkIterator()");
    860     for(int32_t i=0; i<dataLength; ++i) {
    861         if(!iter.hasNext()) {
    862             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
    863             break;
    864         }
    865         UBool hasNext=iter.next(errorCode);
    866         if(errorCode.errIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
    867             break;
    868         }
    869         if(!hasNext) {
    870             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
    871             break;
    872         }
    873         if(iter.getString()!=StringPiece(data[i].s)) {
    874             errln("trie iterator next().getString()=%s but expected %s for item %d",
    875                   iter.getString().data(), data[i].s, (int)i);
    876         }
    877         if(iter.getValue()!=data[i].value) {
    878             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
    879                   (long)iter.getValue(), (long)iter.getValue(),
    880                   (long)data[i].value, (long)data[i].value,
    881                   (int)i, data[i].s);
    882         }
    883     }
    884     if(iter.hasNext()) {
    885         errln("trie iterator hasNext()=TRUE after all items");
    886     }
    887     UBool hasNext=iter.next(errorCode);
    888     errorCode.errIfFailureAndReset("trie iterator next() after all items");
    889     if(hasNext) {
    890         errln("trie iterator next()=TRUE after all items");
    891     }
    892 }
    893