Home | History | Annotate | Download | only in intltest
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2010-2012, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  bytetrietest.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2010nov16
     12 *   created by: Markus W. Scherer
     13 */
     14 
     15 #include <string.h>
     16 
     17 #include "unicode/utypes.h"
     18 #include "unicode/bytestrie.h"
     19 #include "unicode/bytestriebuilder.h"
     20 #include "unicode/localpointer.h"
     21 #include "unicode/stringpiece.h"
     22 #include "intltest.h"
     23 
     24 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     25 
     26 struct StringAndValue {
     27     const char *s;
     28     int32_t value;
     29 };
     30 
     31 class BytesTrieTest : public IntlTest {
     32 public:
     33     BytesTrieTest();
     34     virtual ~BytesTrieTest();
     35 
     36     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
     37     void TestBuilder();
     38     void TestEmpty();
     39     void Test_a();
     40     void Test_a_ab();
     41     void TestShortestBranch();
     42     void TestBranches();
     43     void TestLongSequence();
     44     void TestLongBranch();
     45     void TestValuesForState();
     46     void TestCompact();
     47 
     48     BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
     49     void TestHasUniqueValue();
     50     void TestGetNextBytes();
     51     void TestIteratorFromBranch();
     52     void TestIteratorFromLinearMatch();
     53     void TestTruncatingIteratorFromRoot();
     54     void TestTruncatingIteratorFromLinearMatchShort();
     55     void TestTruncatingIteratorFromLinearMatchLong();
     56     void TestIteratorFromBytes();
     57 
     58     void checkData(const StringAndValue data[], int32_t dataLength);
     59     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
     60     BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
     61                          UStringTrieBuildOption buildOption);
     62     void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     63     void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     64     void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     65     void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     66     void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     67     void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
     68 
     69 private:
     70     BytesTrieBuilder *builder_;
     71 };
     72 
     73 extern IntlTest *createBytesTrieTest() {
     74     return new BytesTrieTest();
     75 }
     76 
     77 BytesTrieTest::BytesTrieTest() : builder_(NULL) {
     78     IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
     79     builder_=new BytesTrieBuilder(errorCode);
     80 }
     81 
     82 BytesTrieTest::~BytesTrieTest() {
     83     delete builder_;
     84 }
     85 
     86 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
     87     if(exec) {
     88         logln("TestSuite BytesTrieTest: ");
     89     }
     90     TESTCASE_AUTO_BEGIN;
     91     TESTCASE_AUTO(TestBuilder);
     92     TESTCASE_AUTO(TestEmpty);
     93     TESTCASE_AUTO(Test_a);
     94     TESTCASE_AUTO(Test_a_ab);
     95     TESTCASE_AUTO(TestShortestBranch);
     96     TESTCASE_AUTO(TestBranches);
     97     TESTCASE_AUTO(TestLongSequence);
     98     TESTCASE_AUTO(TestLongBranch);
     99     TESTCASE_AUTO(TestValuesForState);
    100     TESTCASE_AUTO(TestCompact);
    101     TESTCASE_AUTO(TestHasUniqueValue);
    102     TESTCASE_AUTO(TestGetNextBytes);
    103     TESTCASE_AUTO(TestIteratorFromBranch);
    104     TESTCASE_AUTO(TestIteratorFromLinearMatch);
    105     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
    106     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
    107     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
    108     TESTCASE_AUTO(TestIteratorFromBytes);
    109     TESTCASE_AUTO_END;
    110 }
    111 
    112 void BytesTrieTest::TestBuilder() {
    113     IcuTestErrorCode errorCode(*this, "TestBuilder()");
    114     builder_->clear();
    115     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
    116     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
    117         errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
    118         return;
    119     }
    120     // TODO: remove .build(...) once add() checks for duplicates.
    121     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
    122     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
    123         errln("BytesTrieBuilder.add() did not detect duplicates");
    124         return;
    125     }
    126 }
    127 
    128 void BytesTrieTest::TestEmpty() {
    129     static const StringAndValue data[]={
    130         { "", 0 }
    131     };
    132     checkData(data, LENGTHOF(data));
    133 }
    134 
    135 void BytesTrieTest::Test_a() {
    136     static const StringAndValue data[]={
    137         { "a", 1 }
    138     };
    139     checkData(data, LENGTHOF(data));
    140 }
    141 
    142 void BytesTrieTest::Test_a_ab() {
    143     static const StringAndValue data[]={
    144         { "a", 1 },
    145         { "ab", 100 }
    146     };
    147     checkData(data, LENGTHOF(data));
    148 }
    149 
    150 void BytesTrieTest::TestShortestBranch() {
    151     static const StringAndValue data[]={
    152         { "a", 1000 },
    153         { "b", 2000 }
    154     };
    155     checkData(data, LENGTHOF(data));
    156 }
    157 
    158 void BytesTrieTest::TestBranches() {
    159     static const StringAndValue data[]={
    160         { "a", 0x10 },
    161         { "cc", 0x40 },
    162         { "e", 0x100 },
    163         { "ggg", 0x400 },
    164         { "i", 0x1000 },
    165         { "kkkk", 0x4000 },
    166         { "n", 0x10000 },
    167         { "ppppp", 0x40000 },
    168         { "r", 0x100000 },
    169         { "sss", 0x200000 },
    170         { "t", 0x400000 },
    171         { "uu", 0x800000 },
    172         { "vv", 0x7fffffff },
    173         { "zz", (int32_t)0x80000000 }
    174     };
    175     for(int32_t length=2; length<=LENGTHOF(data); ++length) {
    176         infoln("TestBranches length=%d", (int)length);
    177         checkData(data, length);
    178     }
    179 }
    180 
    181 void BytesTrieTest::TestLongSequence() {
    182     static const StringAndValue data[]={
    183         { "a", -1 },
    184         // sequence of linear-match nodes
    185         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
    186         // more than 256 bytes
    187         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    188           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    189           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    190           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    191           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    192           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
    193     };
    194     checkData(data, LENGTHOF(data));
    195 }
    196 
    197 void BytesTrieTest::TestLongBranch() {
    198     // Split-branch and interesting compact-integer values.
    199     static const StringAndValue data[]={
    200         { "a", -2 },
    201         { "b", -1 },
    202         { "c", 0 },
    203         { "d2", 1 },
    204         { "f", 0x3f },
    205         { "g", 0x40 },
    206         { "h", 0x41 },
    207         { "j23", 0x1900 },
    208         { "j24", 0x19ff },
    209         { "j25", 0x1a00 },
    210         { "k2", 0x1a80 },
    211         { "k3", 0x1aff },
    212         { "l234567890", 0x1b00 },
    213         { "l234567890123", 0x1b01 },
    214         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
    215         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
    216         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
    217         { "r", 0x333333 },
    218         { "s2345", 0x4444444 },
    219         { "t234567890", 0x77777777 },
    220         { "z", (int32_t)0x80000001 }
    221     };
    222     checkData(data, LENGTHOF(data));
    223 }
    224 
    225 void BytesTrieTest::TestValuesForState() {
    226     // Check that saveState() and resetToState() interact properly
    227     // with next() and current().
    228     static const StringAndValue data[]={
    229         { "a", -1 },
    230         { "ab", -2 },
    231         { "abc", -3 },
    232         { "abcd", -4 },
    233         { "abcde", -5 },
    234         { "abcdef", -6 }
    235     };
    236     checkData(data, LENGTHOF(data));
    237 }
    238 
    239 void BytesTrieTest::TestCompact() {
    240     // Duplicate trailing strings and values provide opportunities for compacting.
    241     static const StringAndValue data[]={
    242         { "+", 0 },
    243         { "+august", 8 },
    244         { "+december", 12 },
    245         { "+july", 7 },
    246         { "+june", 6 },
    247         { "+november", 11 },
    248         { "+october", 10 },
    249         { "+september", 9 },
    250         { "-", 0 },
    251         { "-august", 8 },
    252         { "-december", 12 },
    253         { "-july", 7 },
    254         { "-june", 6 },
    255         { "-november", 11 },
    256         { "-october", 10 },
    257         { "-september", 9 },
    258         // The l+n branch (with its sub-nodes) is a duplicate but will be written
    259         // both times because each time it follows a different linear-match node.
    260         { "xjuly", 7 },
    261         { "xjune", 6 }
    262     };
    263     checkData(data, LENGTHOF(data));
    264 }
    265 
    266 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
    267     // All types of nodes leading to the same value,
    268     // for code coverage of recursive functions.
    269     // In particular, we need a lot of branches on some single level
    270     // to exercise a split-branch node.
    271     static const StringAndValue data[]={
    272         { "august", 8 },
    273         { "jan", 1 },
    274         { "jan.", 1 },
    275         { "jana", 1 },
    276         { "janbb", 1 },
    277         { "janc", 1 },
    278         { "janddd", 1 },
    279         { "janee", 1 },
    280         { "janef", 1 },
    281         { "janf", 1 },
    282         { "jangg", 1 },
    283         { "janh", 1 },
    284         { "janiiii", 1 },
    285         { "janj", 1 },
    286         { "jankk", 1 },
    287         { "jankl", 1 },
    288         { "jankmm", 1 },
    289         { "janl", 1 },
    290         { "janm", 1 },
    291         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    292         { "jano", 1 },
    293         { "janpp", 1 },
    294         { "janqqq", 1 },
    295         { "janr", 1 },
    296         { "januar", 1 },
    297         { "january", 1 },
    298         { "july", 7 },
    299         { "jun", 6 },
    300         { "jun.", 6 },
    301         { "june", 6 }
    302     };
    303     return buildTrie(data, LENGTHOF(data), buildOption);
    304 }
    305 
    306 void BytesTrieTest::TestHasUniqueValue() {
    307     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    308     if(trie.isNull()) {
    309         return;  // buildTrie() reported an error
    310     }
    311     int32_t uniqueValue;
    312     if(trie->hasUniqueValue(uniqueValue)) {
    313         errln("unique value at root");
    314     }
    315     trie->next('j');
    316     trie->next('a');
    317     trie->next('n');
    318     // hasUniqueValue() directly after next()
    319     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
    320         errln("not unique value 1 after \"jan\"");
    321     }
    322     trie->first('j');
    323     trie->next('u');
    324     if(trie->hasUniqueValue(uniqueValue)) {
    325         errln("unique value after \"ju\"");
    326     }
    327     if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
    328         errln("not normal value 6 after \"jun\"");
    329     }
    330     // hasUniqueValue() after getValue()
    331     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
    332         errln("not unique value 6 after \"jun\"");
    333     }
    334     // hasUniqueValue() from within a linear-match node
    335     trie->first('a');
    336     trie->next('u');
    337     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
    338         errln("not unique value 8 after \"au\"");
    339     }
    340 }
    341 
    342 void BytesTrieTest::TestGetNextBytes() {
    343     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    344     if(trie.isNull()) {
    345         return;  // buildTrie() reported an error
    346     }
    347     char buffer[40];
    348     CheckedArrayByteSink sink(buffer, LENGTHOF(buffer));
    349     int32_t count=trie->getNextBytes(sink);
    350     if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
    351         errln("months getNextBytes()!=[aj] at root");
    352     }
    353     trie->next('j');
    354     trie->next('a');
    355     trie->next('n');
    356     // getNextBytes() directly after next()
    357     count=trie->getNextBytes(sink.Reset());
    358     buffer[count]=0;
    359     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
    360         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
    361     }
    362     // getNextBytes() after getValue()
    363     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
    364     memset(buffer, 0, sizeof(buffer));
    365     count=trie->getNextBytes(sink.Reset());
    366     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
    367         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
    368     }
    369     // getNextBytes() from a linear-match node
    370     trie->next('u');
    371     memset(buffer, 0, sizeof(buffer));
    372     count=trie->getNextBytes(sink.Reset());
    373     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
    374         errln("months getNextBytes()!=[a] after \"janu\"");
    375     }
    376     trie->next('a');
    377     memset(buffer, 0, sizeof(buffer));
    378     count=trie->getNextBytes(sink.Reset());
    379     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
    380         errln("months getNextBytes()!=[r] after \"janua\"");
    381     }
    382     trie->next('r');
    383     trie->next('y');
    384     // getNextBytes() after a final match
    385     count=trie->getNextBytes(sink.Reset());
    386     if(count!=0 || sink.NumberOfBytesAppended()!=0) {
    387         errln("months getNextBytes()!=[] after \"january\"");
    388     }
    389 }
    390 
    391 void BytesTrieTest::TestIteratorFromBranch() {
    392     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    393     if(trie.isNull()) {
    394         return;  // buildTrie() reported an error
    395     }
    396     // Go to a branch node.
    397     trie->next('j');
    398     trie->next('a');
    399     trie->next('n');
    400     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
    401     BytesTrie::Iterator iter(*trie, 0, errorCode);
    402     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    403         return;
    404     }
    405     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    406     // following "jan".
    407     static const StringAndValue data[]={
    408         { "", 1 },
    409         { ".", 1 },
    410         { "a", 1 },
    411         { "bb", 1 },
    412         { "c", 1 },
    413         { "ddd", 1 },
    414         { "ee", 1 },
    415         { "ef", 1 },
    416         { "f", 1 },
    417         { "gg", 1 },
    418         { "h", 1 },
    419         { "iiii", 1 },
    420         { "j", 1 },
    421         { "kk", 1 },
    422         { "kl", 1 },
    423         { "kmm", 1 },
    424         { "l", 1 },
    425         { "m", 1 },
    426         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    427         { "o", 1 },
    428         { "pp", 1 },
    429         { "qqq", 1 },
    430         { "r", 1 },
    431         { "uar", 1 },
    432         { "uary", 1 }
    433     };
    434     checkIterator(iter, data, LENGTHOF(data));
    435     // Reset, and we should get the same result.
    436     logln("after iter.reset()");
    437     checkIterator(iter.reset(), data, LENGTHOF(data));
    438 }
    439 
    440 void BytesTrieTest::TestIteratorFromLinearMatch() {
    441     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    442     if(trie.isNull()) {
    443         return;  // buildTrie() reported an error
    444     }
    445     // Go into a linear-match node.
    446     trie->next('j');
    447     trie->next('a');
    448     trie->next('n');
    449     trie->next('u');
    450     trie->next('a');
    451     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
    452     BytesTrie::Iterator iter(*trie, 0, errorCode);
    453     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    454         return;
    455     }
    456     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    457     // following "janua".
    458     static const StringAndValue data[]={
    459         { "r", 1 },
    460         { "ry", 1 }
    461     };
    462     checkIterator(iter, data, LENGTHOF(data));
    463     // Reset, and we should get the same result.
    464     logln("after iter.reset()");
    465     checkIterator(iter.reset(), data, LENGTHOF(data));
    466 }
    467 
    468 void BytesTrieTest::TestTruncatingIteratorFromRoot() {
    469     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    470     if(trie.isNull()) {
    471         return;  // buildTrie() reported an error
    472     }
    473     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
    474     BytesTrie::Iterator iter(*trie, 4, errorCode);
    475     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    476         return;
    477     }
    478     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
    479     // of each string, and no string duplicates from the truncation.
    480     static const StringAndValue data[]={
    481         { "augu", -1 },
    482         { "jan", 1 },
    483         { "jan.", 1 },
    484         { "jana", 1 },
    485         { "janb", -1 },
    486         { "janc", 1 },
    487         { "jand", -1 },
    488         { "jane", -1 },
    489         { "janf", 1 },
    490         { "jang", -1 },
    491         { "janh", 1 },
    492         { "jani", -1 },
    493         { "janj", 1 },
    494         { "jank", -1 },
    495         { "janl", 1 },
    496         { "janm", 1 },
    497         { "jann", -1 },
    498         { "jano", 1 },
    499         { "janp", -1 },
    500         { "janq", -1 },
    501         { "janr", 1 },
    502         { "janu", -1 },
    503         { "july", 7 },
    504         { "jun", 6 },
    505         { "jun.", 6 },
    506         { "june", 6 }
    507     };
    508     checkIterator(iter, data, LENGTHOF(data));
    509     // Reset, and we should get the same result.
    510     logln("after iter.reset()");
    511     checkIterator(iter.reset(), data, LENGTHOF(data));
    512 }
    513 
    514 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
    515     static const StringAndValue data[]={
    516         { "abcdef", 10 },
    517         { "abcdepq", 200 },
    518         { "abcdeyz", 3000 }
    519     };
    520     LocalPointer<BytesTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    521     if(trie.isNull()) {
    522         return;  // buildTrie() reported an error
    523     }
    524     // Go into a linear-match node.
    525     trie->next('a');
    526     trie->next('b');
    527     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
    528     // Truncate within the linear-match node.
    529     BytesTrie::Iterator iter(*trie, 2, errorCode);
    530     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    531         return;
    532     }
    533     static const StringAndValue expected[]={
    534         { "cd", -1 }
    535     };
    536     checkIterator(iter, expected, LENGTHOF(expected));
    537     // Reset, and we should get the same result.
    538     logln("after iter.reset()");
    539     checkIterator(iter.reset(), expected, LENGTHOF(expected));
    540 }
    541 
    542 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
    543     static const StringAndValue data[]={
    544         { "abcdef", 10 },
    545         { "abcdepq", 200 },
    546         { "abcdeyz", 3000 }
    547     };
    548     LocalPointer<BytesTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    549     if(trie.isNull()) {
    550         return;  // buildTrie() reported an error
    551     }
    552     // Go into a linear-match node.
    553     trie->next('a');
    554     trie->next('b');
    555     trie->next('c');
    556     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
    557     // Truncate after the linear-match node.
    558     BytesTrie::Iterator iter(*trie, 3, errorCode);
    559     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    560         return;
    561     }
    562     static const StringAndValue expected[]={
    563         { "def", 10 },
    564         { "dep", -1 },
    565         { "dey", -1 }
    566     };
    567     checkIterator(iter, expected, LENGTHOF(expected));
    568     // Reset, and we should get the same result.
    569     logln("after iter.reset()");
    570     checkIterator(iter.reset(), expected, LENGTHOF(expected));
    571 }
    572 
    573 void BytesTrieTest::TestIteratorFromBytes() {
    574     static const StringAndValue data[]={
    575         { "mm", 3 },
    576         { "mmm", 33 },
    577         { "mmnop", 333 }
    578     };
    579     builder_->clear();
    580     IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
    581     for(int32_t i=0; i<LENGTHOF(data); ++i) {
    582         builder_->add(data[i].s, data[i].value, errorCode);
    583     }
    584     StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
    585     BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
    586     checkIterator(iter, data, LENGTHOF(data));
    587 }
    588 
    589 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
    590     logln("checkData(dataLength=%d, fast)", (int)dataLength);
    591     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
    592     logln("checkData(dataLength=%d, small)", (int)dataLength);
    593     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
    594 }
    595 
    596 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
    597     LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
    598     if(trie.isNull()) {
    599         return;  // buildTrie() reported an error
    600     }
    601     checkFirst(*trie, data, dataLength);
    602     checkNext(*trie, data, dataLength);
    603     checkNextWithState(*trie, data, dataLength);
    604     checkNextString(*trie, data, dataLength);
    605     checkIterator(*trie, data, dataLength);
    606 }
    607 
    608 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
    609                                     UStringTrieBuildOption buildOption) {
    610     IcuTestErrorCode errorCode(*this, "buildTrie()");
    611     // Add the items to the trie builder in an interesting (not trivial, not random) order.
    612     int32_t index, step;
    613     if(dataLength&1) {
    614         // Odd number of items.
    615         index=dataLength/2;
    616         step=2;
    617     } else if((dataLength%3)!=0) {
    618         // Not a multiple of 3.
    619         index=dataLength/5;
    620         step=3;
    621     } else {
    622         index=dataLength-1;
    623         step=-1;
    624     }
    625     builder_->clear();
    626     for(int32_t i=0; i<dataLength; ++i) {
    627         builder_->add(data[index].s, data[index].value, errorCode);
    628         index=(index+step)%dataLength;
    629     }
    630     StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
    631     LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
    632     if(!errorCode.logIfFailureAndReset("add()/build()")) {
    633         builder_->add("zzz", 999, errorCode);
    634         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
    635             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
    636         }
    637     }
    638     logln("serialized trie size: %ld bytes\n", (long)sp.length());
    639     StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
    640     if(sp.data()==sp2.data()) {
    641         errln("builder.buildStringPiece() before & after build() returned same array");
    642     }
    643     if(errorCode.isFailure()) {
    644         return NULL;
    645     }
    646     // Tries from either build() method should be identical but
    647     // BytesTrie does not implement equals().
    648     // We just return either one.
    649     if((dataLength&1)!=0) {
    650         return trie.orphan();
    651     } else {
    652         return new BytesTrie(sp2.data());
    653     }
    654 }
    655 
    656 void BytesTrieTest::checkFirst(BytesTrie &trie,
    657                                const StringAndValue data[], int32_t dataLength) {
    658     for(int32_t i=0; i<dataLength; ++i) {
    659         int c=*data[i].s;
    660         if(c==0) {
    661             continue;  // skip empty string
    662         }
    663         UStringTrieResult firstResult=trie.first(c);
    664         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
    665         UStringTrieResult nextResult=trie.next(data[i].s[1]);
    666         if(firstResult!=trie.reset().next(c) ||
    667            firstResult!=trie.current() ||
    668            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
    669            nextResult!=trie.next(data[i].s[1])
    670         ) {
    671             errln("trie.first(%c)!=trie.reset().next(same) for %s",
    672                   c, data[i].s);
    673         }
    674     }
    675     trie.reset();
    676 }
    677 
    678 void BytesTrieTest::checkNext(BytesTrie &trie,
    679                               const StringAndValue data[], int32_t dataLength) {
    680     BytesTrie::State state;
    681     for(int32_t i=0; i<dataLength; ++i) {
    682         int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
    683         UStringTrieResult result;
    684         if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
    685             result!=trie.current()
    686         ) {
    687             errln("trie does not seem to contain %s", data[i].s);
    688         } else if(trie.getValue()!=data[i].value) {
    689             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    690                   data[i].s,
    691                   (long)trie.getValue(), (long)trie.getValue(),
    692                   (long)data[i].value, (long)data[i].value);
    693         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
    694             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
    695         }
    696         trie.reset();
    697         stringLength=strlen(data[i].s);
    698         result=trie.current();
    699         for(int32_t j=0; j<stringLength; ++j) {
    700             if(!USTRINGTRIE_HAS_NEXT(result)) {
    701                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
    702                 break;
    703             }
    704             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
    705                 trie.getValue();
    706                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
    707                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
    708                     break;
    709                 }
    710             }
    711             result=trie.next(data[i].s[j]);
    712             if(!USTRINGTRIE_MATCHES(result)) {
    713                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
    714                 break;
    715             }
    716             if(result!=trie.current()) {
    717                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
    718                 break;
    719             }
    720         }
    721         if(!USTRINGTRIE_HAS_VALUE(result)) {
    722             errln("trie.next()!=hasValue at the end of %s", data[i].s);
    723             continue;
    724         }
    725         trie.getValue();
    726         if(result!=trie.current()) {
    727             errln("trie.current() != current()+getValue()+current() after end of %s",
    728                   data[i].s);
    729         }
    730         // Compare the final current() with whether next() can actually continue.
    731         trie.saveState(state);
    732         UBool nextContinues=FALSE;
    733         // Try all graphic characters; we only use those in test strings in this file.
    734 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    735         const int32_t minChar=0x20;
    736         const int32_t maxChar=0x7e;
    737 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    738         const int32_t minChar=0x40;
    739         const int32_t maxChar=0xfe;
    740 #else
    741         const int32_t minChar=0;
    742         const int32_t maxChar=0xff;
    743 #endif
    744         for(int32_t c=minChar; c<=maxChar; ++c) {
    745             if(trie.resetToState(state).next(c)) {
    746                 nextContinues=TRUE;
    747                 break;
    748             }
    749         }
    750         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
    751             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
    752                   "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
    753         }
    754         trie.reset();
    755     }
    756 }
    757 
    758 void BytesTrieTest::checkNextWithState(BytesTrie &trie,
    759                                        const StringAndValue data[], int32_t dataLength) {
    760     BytesTrie::State noState, state;
    761     for(int32_t i=0; i<dataLength; ++i) {
    762         if((i&1)==0) {
    763             // This should have no effect.
    764             trie.resetToState(noState);
    765         }
    766         const char *expectedString=data[i].s;
    767         int32_t stringLength=strlen(expectedString);
    768         int32_t partialLength=stringLength/3;
    769         for(int32_t j=0; j<partialLength; ++j) {
    770             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
    771                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
    772                 return;
    773             }
    774         }
    775         trie.saveState(state);
    776         UStringTrieResult resultAtState=trie.current();
    777         UStringTrieResult result;
    778         int32_t valueAtState=-99;
    779         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
    780             valueAtState=trie.getValue();
    781         }
    782         result=trie.next(0);  // mismatch
    783         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
    784             errln("trie.next(0) matched after part of %s", data[i].s);
    785         }
    786         if( resultAtState!=trie.resetToState(state).current() ||
    787             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
    788         ) {
    789             errln("trie.next(part of %s) changes current()/getValue() after "
    790                   "saveState/next(0)/resetToState",
    791                   data[i].s);
    792         } else if(!USTRINGTRIE_HAS_VALUE(
    793                       result=trie.next(expectedString+partialLength,
    794                                        stringLength-partialLength)) ||
    795                   result!=trie.current()) {
    796             errln("trie.next(rest of %s) does not seem to contain %s after "
    797                   "saveState/next(0)/resetToState",
    798                   data[i].s, data[i].s);
    799         } else if(!USTRINGTRIE_HAS_VALUE(
    800                       result=trie.resetToState(state).
    801                                   next(expectedString+partialLength,
    802                                        stringLength-partialLength)) ||
    803                   result!=trie.current()) {
    804             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
    805                   data[i].s);
    806         } else if(trie.getValue()!=data[i].value) {
    807             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    808                   data[i].s,
    809                   (long)trie.getValue(), (long)trie.getValue(),
    810                   (long)data[i].value, (long)data[i].value);
    811         }
    812         trie.reset();
    813     }
    814 }
    815 
    816 // next(string) is also tested in other functions,
    817 // but here we try to go partway through the string, and then beyond it.
    818 void BytesTrieTest::checkNextString(BytesTrie &trie,
    819                                     const StringAndValue data[], int32_t dataLength) {
    820     for(int32_t i=0; i<dataLength; ++i) {
    821         const char *expectedString=data[i].s;
    822         int32_t stringLength=strlen(expectedString);
    823         if(!trie.next(expectedString, stringLength/2)) {
    824             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
    825             continue;
    826         }
    827         // Test that we stop properly at the end of the string.
    828         if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
    829             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
    830         }
    831         trie.reset();
    832     }
    833 }
    834 
    835 void BytesTrieTest::checkIterator(const BytesTrie &trie,
    836                                   const StringAndValue data[], int32_t dataLength) {
    837     IcuTestErrorCode errorCode(*this, "checkIterator()");
    838     BytesTrie::Iterator iter(trie, 0, errorCode);
    839     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    840         return;
    841     }
    842     checkIterator(iter, data, dataLength);
    843 }
    844 
    845 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
    846                                   const StringAndValue data[], int32_t dataLength) {
    847     IcuTestErrorCode errorCode(*this, "checkIterator()");
    848     for(int32_t i=0; i<dataLength; ++i) {
    849         if(!iter.hasNext()) {
    850             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
    851             break;
    852         }
    853         UBool hasNext=iter.next(errorCode);
    854         if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
    855             break;
    856         }
    857         if(!hasNext) {
    858             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
    859             break;
    860         }
    861         if(iter.getString()!=StringPiece(data[i].s)) {
    862             errln("trie iterator next().getString()=%s but expected %s for item %d",
    863                   iter.getString().data(), data[i].s, (int)i);
    864         }
    865         if(iter.getValue()!=data[i].value) {
    866             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
    867                   (long)iter.getValue(), (long)iter.getValue(),
    868                   (long)data[i].value, (long)data[i].value,
    869                   (int)i, data[i].s);
    870         }
    871     }
    872     if(iter.hasNext()) {
    873         errln("trie iterator hasNext()=TRUE after all items");
    874     }
    875     UBool hasNext=iter.next(errorCode);
    876     errorCode.logIfFailureAndReset("trie iterator next() after all items");
    877     if(hasNext) {
    878         errln("trie iterator next()=TRUE after all items");
    879     }
    880 }
    881