Home | History | Annotate | Download | only in intltest
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2010-2014, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  bytetrietest.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2010nov16
     12 *   created by: Markus W. Scherer
     13 */
     14 
     15 #include <string.h>
     16 
     17 #include "unicode/utypes.h"
     18 #include "unicode/bytestrie.h"
     19 #include "unicode/bytestriebuilder.h"
     20 #include "unicode/localpointer.h"
     21 #include "unicode/stringpiece.h"
     22 #include "intltest.h"
     23 #include "cmemory.h"
     24 
     25 struct StringAndValue {
     26     const char *s;
     27     int32_t value;
     28 };
     29 
     30 class BytesTrieTest : public IntlTest {
     31 public:
     32     BytesTrieTest();
     33     virtual ~BytesTrieTest();
     34 
     35     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
     36     void TestBuilder();
     37     void TestEmpty();
     38     void Test_a();
     39     void Test_a_ab();
     40     void TestShortestBranch();
     41     void TestBranches();
     42     void TestLongSequence();
     43     void TestLongBranch();
     44     void TestValuesForState();
     45     void TestCompact();
     46 
     47     BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
     48     void TestHasUniqueValue();
     49     void TestGetNextBytes();
     50     void TestIteratorFromBranch();
     51     void TestIteratorFromLinearMatch();
     52     void TestTruncatingIteratorFromRoot();
     53     void TestTruncatingIteratorFromLinearMatchShort();
     54     void TestTruncatingIteratorFromLinearMatchLong();
     55     void TestIteratorFromBytes();
     56 
     57     void checkData(const StringAndValue data[], int32_t dataLength);
     58     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
     59     BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
     60                          UStringTrieBuildOption buildOption);
     61     void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     62     void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     63     void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     64     void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     65     void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
     66     void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
     67 
     68 private:
     69     BytesTrieBuilder *builder_;
     70 };
     71 
     72 extern IntlTest *createBytesTrieTest() {
     73     return new BytesTrieTest();
     74 }
     75 
     76 BytesTrieTest::BytesTrieTest() : builder_(NULL) {
     77     IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
     78     builder_=new BytesTrieBuilder(errorCode);
     79 }
     80 
     81 BytesTrieTest::~BytesTrieTest() {
     82     delete builder_;
     83 }
     84 
     85 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
     86     if(exec) {
     87         logln("TestSuite BytesTrieTest: ");
     88     }
     89     TESTCASE_AUTO_BEGIN;
     90     TESTCASE_AUTO(TestBuilder);
     91     TESTCASE_AUTO(TestEmpty);
     92     TESTCASE_AUTO(Test_a);
     93     TESTCASE_AUTO(Test_a_ab);
     94     TESTCASE_AUTO(TestShortestBranch);
     95     TESTCASE_AUTO(TestBranches);
     96     TESTCASE_AUTO(TestLongSequence);
     97     TESTCASE_AUTO(TestLongBranch);
     98     TESTCASE_AUTO(TestValuesForState);
     99     TESTCASE_AUTO(TestCompact);
    100     TESTCASE_AUTO(TestHasUniqueValue);
    101     TESTCASE_AUTO(TestGetNextBytes);
    102     TESTCASE_AUTO(TestIteratorFromBranch);
    103     TESTCASE_AUTO(TestIteratorFromLinearMatch);
    104     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
    105     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
    106     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
    107     TESTCASE_AUTO(TestIteratorFromBytes);
    108     TESTCASE_AUTO_END;
    109 }
    110 
    111 void BytesTrieTest::TestBuilder() {
    112     IcuTestErrorCode errorCode(*this, "TestBuilder()");
    113     builder_->clear();
    114     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
    115     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
    116         errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
    117         return;
    118     }
    119     // TODO: remove .build(...) once add() checks for duplicates.
    120     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
    121     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
    122         errln("BytesTrieBuilder.add() did not detect duplicates");
    123         return;
    124     }
    125 }
    126 
    127 void BytesTrieTest::TestEmpty() {
    128     static const StringAndValue data[]={
    129         { "", 0 }
    130     };
    131     checkData(data, UPRV_LENGTHOF(data));
    132 }
    133 
    134 void BytesTrieTest::Test_a() {
    135     static const StringAndValue data[]={
    136         { "a", 1 }
    137     };
    138     checkData(data, UPRV_LENGTHOF(data));
    139 }
    140 
    141 void BytesTrieTest::Test_a_ab() {
    142     static const StringAndValue data[]={
    143         { "a", 1 },
    144         { "ab", 100 }
    145     };
    146     checkData(data, UPRV_LENGTHOF(data));
    147 }
    148 
    149 void BytesTrieTest::TestShortestBranch() {
    150     static const StringAndValue data[]={
    151         { "a", 1000 },
    152         { "b", 2000 }
    153     };
    154     checkData(data, UPRV_LENGTHOF(data));
    155 }
    156 
    157 void BytesTrieTest::TestBranches() {
    158     static const StringAndValue data[]={
    159         { "a", 0x10 },
    160         { "cc", 0x40 },
    161         { "e", 0x100 },
    162         { "ggg", 0x400 },
    163         { "i", 0x1000 },
    164         { "kkkk", 0x4000 },
    165         { "n", 0x10000 },
    166         { "ppppp", 0x40000 },
    167         { "r", 0x100000 },
    168         { "sss", 0x200000 },
    169         { "t", 0x400000 },
    170         { "uu", 0x800000 },
    171         { "vv", 0x7fffffff },
    172         { "zz", (int32_t)0x80000000 }
    173     };
    174     for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
    175         logln("TestBranches length=%d", (int)length);
    176         checkData(data, length);
    177     }
    178 }
    179 
    180 void BytesTrieTest::TestLongSequence() {
    181     static const StringAndValue data[]={
    182         { "a", -1 },
    183         // sequence of linear-match nodes
    184         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
    185         // more than 256 bytes
    186         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    187           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    188           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    189           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    190           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    191           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
    192     };
    193     checkData(data, UPRV_LENGTHOF(data));
    194 }
    195 
    196 void BytesTrieTest::TestLongBranch() {
    197     // Split-branch and interesting compact-integer values.
    198     static const StringAndValue data[]={
    199         { "a", -2 },
    200         { "b", -1 },
    201         { "c", 0 },
    202         { "d2", 1 },
    203         { "f", 0x3f },
    204         { "g", 0x40 },
    205         { "h", 0x41 },
    206         { "j23", 0x1900 },
    207         { "j24", 0x19ff },
    208         { "j25", 0x1a00 },
    209         { "k2", 0x1a80 },
    210         { "k3", 0x1aff },
    211         { "l234567890", 0x1b00 },
    212         { "l234567890123", 0x1b01 },
    213         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
    214         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
    215         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
    216         { "r", 0x333333 },
    217         { "s2345", 0x4444444 },
    218         { "t234567890", 0x77777777 },
    219         { "z", (int32_t)0x80000001 }
    220     };
    221     checkData(data, UPRV_LENGTHOF(data));
    222 }
    223 
    224 void BytesTrieTest::TestValuesForState() {
    225     // Check that saveState() and resetToState() interact properly
    226     // with next() and current().
    227     static const StringAndValue data[]={
    228         { "a", -1 },
    229         { "ab", -2 },
    230         { "abc", -3 },
    231         { "abcd", -4 },
    232         { "abcde", -5 },
    233         { "abcdef", -6 }
    234     };
    235     checkData(data, UPRV_LENGTHOF(data));
    236 }
    237 
    238 void BytesTrieTest::TestCompact() {
    239     // Duplicate trailing strings and values provide opportunities for compacting.
    240     static const StringAndValue data[]={
    241         { "+", 0 },
    242         { "+august", 8 },
    243         { "+december", 12 },
    244         { "+july", 7 },
    245         { "+june", 6 },
    246         { "+november", 11 },
    247         { "+october", 10 },
    248         { "+september", 9 },
    249         { "-", 0 },
    250         { "-august", 8 },
    251         { "-december", 12 },
    252         { "-july", 7 },
    253         { "-june", 6 },
    254         { "-november", 11 },
    255         { "-october", 10 },
    256         { "-september", 9 },
    257         // The l+n branch (with its sub-nodes) is a duplicate but will be written
    258         // both times because each time it follows a different linear-match node.
    259         { "xjuly", 7 },
    260         { "xjune", 6 }
    261     };
    262     checkData(data, UPRV_LENGTHOF(data));
    263 }
    264 
    265 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
    266     // All types of nodes leading to the same value,
    267     // for code coverage of recursive functions.
    268     // In particular, we need a lot of branches on some single level
    269     // to exercise a split-branch node.
    270     static const StringAndValue data[]={
    271         { "august", 8 },
    272         { "jan", 1 },
    273         { "jan.", 1 },
    274         { "jana", 1 },
    275         { "janbb", 1 },
    276         { "janc", 1 },
    277         { "janddd", 1 },
    278         { "janee", 1 },
    279         { "janef", 1 },
    280         { "janf", 1 },
    281         { "jangg", 1 },
    282         { "janh", 1 },
    283         { "janiiii", 1 },
    284         { "janj", 1 },
    285         { "jankk", 1 },
    286         { "jankl", 1 },
    287         { "jankmm", 1 },
    288         { "janl", 1 },
    289         { "janm", 1 },
    290         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    291         { "jano", 1 },
    292         { "janpp", 1 },
    293         { "janqqq", 1 },
    294         { "janr", 1 },
    295         { "januar", 1 },
    296         { "january", 1 },
    297         { "july", 7 },
    298         { "jun", 6 },
    299         { "jun.", 6 },
    300         { "june", 6 }
    301     };
    302     return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
    303 }
    304 
    305 void BytesTrieTest::TestHasUniqueValue() {
    306     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    307     if(trie.isNull()) {
    308         return;  // buildTrie() reported an error
    309     }
    310     int32_t uniqueValue;
    311     if(trie->hasUniqueValue(uniqueValue)) {
    312         errln("unique value at root");
    313     }
    314     trie->next('j');
    315     trie->next('a');
    316     trie->next('n');
    317     // hasUniqueValue() directly after next()
    318     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
    319         errln("not unique value 1 after \"jan\"");
    320     }
    321     trie->first('j');
    322     trie->next('u');
    323     if(trie->hasUniqueValue(uniqueValue)) {
    324         errln("unique value after \"ju\"");
    325     }
    326     if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
    327         errln("not normal value 6 after \"jun\"");
    328     }
    329     // hasUniqueValue() after getValue()
    330     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
    331         errln("not unique value 6 after \"jun\"");
    332     }
    333     // hasUniqueValue() from within a linear-match node
    334     trie->first('a');
    335     trie->next('u');
    336     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
    337         errln("not unique value 8 after \"au\"");
    338     }
    339 }
    340 
    341 void BytesTrieTest::TestGetNextBytes() {
    342     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    343     if(trie.isNull()) {
    344         return;  // buildTrie() reported an error
    345     }
    346     char buffer[40];
    347     CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
    348     int32_t count=trie->getNextBytes(sink);
    349     if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
    350         errln("months getNextBytes()!=[aj] at root");
    351     }
    352     trie->next('j');
    353     trie->next('a');
    354     trie->next('n');
    355     // getNextBytes() directly after next()
    356     count=trie->getNextBytes(sink.Reset());
    357     buffer[count]=0;
    358     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
    359         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
    360     }
    361     // getNextBytes() after getValue()
    362     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
    363     memset(buffer, 0, sizeof(buffer));
    364     count=trie->getNextBytes(sink.Reset());
    365     if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
    366         errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
    367     }
    368     // getNextBytes() from a linear-match node
    369     trie->next('u');
    370     memset(buffer, 0, sizeof(buffer));
    371     count=trie->getNextBytes(sink.Reset());
    372     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
    373         errln("months getNextBytes()!=[a] after \"janu\"");
    374     }
    375     trie->next('a');
    376     memset(buffer, 0, sizeof(buffer));
    377     count=trie->getNextBytes(sink.Reset());
    378     if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
    379         errln("months getNextBytes()!=[r] after \"janua\"");
    380     }
    381     trie->next('r');
    382     trie->next('y');
    383     // getNextBytes() after a final match
    384     count=trie->getNextBytes(sink.Reset());
    385     if(count!=0 || sink.NumberOfBytesAppended()!=0) {
    386         errln("months getNextBytes()!=[] after \"january\"");
    387     }
    388 }
    389 
    390 void BytesTrieTest::TestIteratorFromBranch() {
    391     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    392     if(trie.isNull()) {
    393         return;  // buildTrie() reported an error
    394     }
    395     // Go to a branch node.
    396     trie->next('j');
    397     trie->next('a');
    398     trie->next('n');
    399     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
    400     BytesTrie::Iterator iter(*trie, 0, errorCode);
    401     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    402         return;
    403     }
    404     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    405     // following "jan".
    406     static const StringAndValue data[]={
    407         { "", 1 },
    408         { ".", 1 },
    409         { "a", 1 },
    410         { "bb", 1 },
    411         { "c", 1 },
    412         { "ddd", 1 },
    413         { "ee", 1 },
    414         { "ef", 1 },
    415         { "f", 1 },
    416         { "gg", 1 },
    417         { "h", 1 },
    418         { "iiii", 1 },
    419         { "j", 1 },
    420         { "kk", 1 },
    421         { "kl", 1 },
    422         { "kmm", 1 },
    423         { "l", 1 },
    424         { "m", 1 },
    425         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    426         { "o", 1 },
    427         { "pp", 1 },
    428         { "qqq", 1 },
    429         { "r", 1 },
    430         { "uar", 1 },
    431         { "uary", 1 }
    432     };
    433     checkIterator(iter, data, UPRV_LENGTHOF(data));
    434     // Reset, and we should get the same result.
    435     logln("after iter.reset()");
    436     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    437 }
    438 
    439 void BytesTrieTest::TestIteratorFromLinearMatch() {
    440     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    441     if(trie.isNull()) {
    442         return;  // buildTrie() reported an error
    443     }
    444     // Go into a linear-match node.
    445     trie->next('j');
    446     trie->next('a');
    447     trie->next('n');
    448     trie->next('u');
    449     trie->next('a');
    450     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
    451     BytesTrie::Iterator iter(*trie, 0, errorCode);
    452     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    453         return;
    454     }
    455     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    456     // following "janua".
    457     static const StringAndValue data[]={
    458         { "r", 1 },
    459         { "ry", 1 }
    460     };
    461     checkIterator(iter, data, UPRV_LENGTHOF(data));
    462     // Reset, and we should get the same result.
    463     logln("after iter.reset()");
    464     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    465 }
    466 
    467 void BytesTrieTest::TestTruncatingIteratorFromRoot() {
    468     LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    469     if(trie.isNull()) {
    470         return;  // buildTrie() reported an error
    471     }
    472     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
    473     BytesTrie::Iterator iter(*trie, 4, errorCode);
    474     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    475         return;
    476     }
    477     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
    478     // of each string, and no string duplicates from the truncation.
    479     static const StringAndValue data[]={
    480         { "augu", -1 },
    481         { "jan", 1 },
    482         { "jan.", 1 },
    483         { "jana", 1 },
    484         { "janb", -1 },
    485         { "janc", 1 },
    486         { "jand", -1 },
    487         { "jane", -1 },
    488         { "janf", 1 },
    489         { "jang", -1 },
    490         { "janh", 1 },
    491         { "jani", -1 },
    492         { "janj", 1 },
    493         { "jank", -1 },
    494         { "janl", 1 },
    495         { "janm", 1 },
    496         { "jann", -1 },
    497         { "jano", 1 },
    498         { "janp", -1 },
    499         { "janq", -1 },
    500         { "janr", 1 },
    501         { "janu", -1 },
    502         { "july", 7 },
    503         { "jun", 6 },
    504         { "jun.", 6 },
    505         { "june", 6 }
    506     };
    507     checkIterator(iter, data, UPRV_LENGTHOF(data));
    508     // Reset, and we should get the same result.
    509     logln("after iter.reset()");
    510     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    511 }
    512 
    513 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
    514     static const StringAndValue data[]={
    515         { "abcdef", 10 },
    516         { "abcdepq", 200 },
    517         { "abcdeyz", 3000 }
    518     };
    519     LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    520     if(trie.isNull()) {
    521         return;  // buildTrie() reported an error
    522     }
    523     // Go into a linear-match node.
    524     trie->next('a');
    525     trie->next('b');
    526     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
    527     // Truncate within the linear-match node.
    528     BytesTrie::Iterator iter(*trie, 2, errorCode);
    529     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    530         return;
    531     }
    532     static const StringAndValue expected[]={
    533         { "cd", -1 }
    534     };
    535     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
    536     // Reset, and we should get the same result.
    537     logln("after iter.reset()");
    538     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
    539 }
    540 
    541 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
    542     static const StringAndValue data[]={
    543         { "abcdef", 10 },
    544         { "abcdepq", 200 },
    545         { "abcdeyz", 3000 }
    546     };
    547     LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    548     if(trie.isNull()) {
    549         return;  // buildTrie() reported an error
    550     }
    551     // Go into a linear-match node.
    552     trie->next('a');
    553     trie->next('b');
    554     trie->next('c');
    555     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
    556     // Truncate after the linear-match node.
    557     BytesTrie::Iterator iter(*trie, 3, errorCode);
    558     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    559         return;
    560     }
    561     static const StringAndValue expected[]={
    562         { "def", 10 },
    563         { "dep", -1 },
    564         { "dey", -1 }
    565     };
    566     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
    567     // Reset, and we should get the same result.
    568     logln("after iter.reset()");
    569     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
    570 }
    571 
    572 void BytesTrieTest::TestIteratorFromBytes() {
    573     static const StringAndValue data[]={
    574         { "mm", 3 },
    575         { "mmm", 33 },
    576         { "mmnop", 333 }
    577     };
    578     builder_->clear();
    579     IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
    580     for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
    581         builder_->add(data[i].s, data[i].value, errorCode);
    582     }
    583     StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
    584     BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
    585     checkIterator(iter, data, UPRV_LENGTHOF(data));
    586 }
    587 
    588 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
    589     logln("checkData(dataLength=%d, fast)", (int)dataLength);
    590     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
    591     logln("checkData(dataLength=%d, small)", (int)dataLength);
    592     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
    593 }
    594 
    595 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
    596     LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
    597     if(trie.isNull()) {
    598         return;  // buildTrie() reported an error
    599     }
    600     checkFirst(*trie, data, dataLength);
    601     checkNext(*trie, data, dataLength);
    602     checkNextWithState(*trie, data, dataLength);
    603     checkNextString(*trie, data, dataLength);
    604     checkIterator(*trie, data, dataLength);
    605 }
    606 
    607 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
    608                                     UStringTrieBuildOption buildOption) {
    609     IcuTestErrorCode errorCode(*this, "buildTrie()");
    610     // Add the items to the trie builder in an interesting (not trivial, not random) order.
    611     int32_t index, step;
    612     if(dataLength&1) {
    613         // Odd number of items.
    614         index=dataLength/2;
    615         step=2;
    616     } else if((dataLength%3)!=0) {
    617         // Not a multiple of 3.
    618         index=dataLength/5;
    619         step=3;
    620     } else {
    621         index=dataLength-1;
    622         step=-1;
    623     }
    624     builder_->clear();
    625     for(int32_t i=0; i<dataLength; ++i) {
    626         builder_->add(data[index].s, data[index].value, errorCode);
    627         index=(index+step)%dataLength;
    628     }
    629     StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
    630     LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
    631     if(!errorCode.logIfFailureAndReset("add()/build()")) {
    632         builder_->add("zzz", 999, errorCode);
    633         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
    634             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
    635         }
    636     }
    637     logln("serialized trie size: %ld bytes\n", (long)sp.length());
    638     StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
    639     if(sp.data()==sp2.data()) {
    640         errln("builder.buildStringPiece() before & after build() returned same array");
    641     }
    642     if(errorCode.isFailure()) {
    643         return NULL;
    644     }
    645     // Tries from either build() method should be identical but
    646     // BytesTrie does not implement equals().
    647     // We just return either one.
    648     if((dataLength&1)!=0) {
    649         return trie.orphan();
    650     } else {
    651         return new BytesTrie(sp2.data());
    652     }
    653 }
    654 
    655 void BytesTrieTest::checkFirst(BytesTrie &trie,
    656                                const StringAndValue data[], int32_t dataLength) {
    657     for(int32_t i=0; i<dataLength; ++i) {
    658         int c=*data[i].s;
    659         if(c==0) {
    660             continue;  // skip empty string
    661         }
    662         UStringTrieResult firstResult=trie.first(c);
    663         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
    664         UStringTrieResult nextResult=trie.next(data[i].s[1]);
    665         if(firstResult!=trie.reset().next(c) ||
    666            firstResult!=trie.current() ||
    667            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
    668            nextResult!=trie.next(data[i].s[1])
    669         ) {
    670             errln("trie.first(%c)!=trie.reset().next(same) for %s",
    671                   c, data[i].s);
    672         }
    673     }
    674     trie.reset();
    675 }
    676 
    677 void BytesTrieTest::checkNext(BytesTrie &trie,
    678                               const StringAndValue data[], int32_t dataLength) {
    679     BytesTrie::State state;
    680     for(int32_t i=0; i<dataLength; ++i) {
    681         int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
    682         UStringTrieResult result;
    683         if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
    684             result!=trie.current()
    685         ) {
    686             errln("trie does not seem to contain %s", data[i].s);
    687         } else if(trie.getValue()!=data[i].value) {
    688             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    689                   data[i].s,
    690                   (long)trie.getValue(), (long)trie.getValue(),
    691                   (long)data[i].value, (long)data[i].value);
    692         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
    693             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
    694         }
    695         trie.reset();
    696         stringLength=strlen(data[i].s);
    697         result=trie.current();
    698         for(int32_t j=0; j<stringLength; ++j) {
    699             if(!USTRINGTRIE_HAS_NEXT(result)) {
    700                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
    701                 break;
    702             }
    703             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
    704                 trie.getValue();
    705                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
    706                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
    707                     break;
    708                 }
    709             }
    710             result=trie.next(data[i].s[j]);
    711             if(!USTRINGTRIE_MATCHES(result)) {
    712                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
    713                 break;
    714             }
    715             if(result!=trie.current()) {
    716                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
    717                 break;
    718             }
    719         }
    720         if(!USTRINGTRIE_HAS_VALUE(result)) {
    721             errln("trie.next()!=hasValue at the end of %s", data[i].s);
    722             continue;
    723         }
    724         trie.getValue();
    725         if(result!=trie.current()) {
    726             errln("trie.current() != current()+getValue()+current() after end of %s",
    727                   data[i].s);
    728         }
    729         // Compare the final current() with whether next() can actually continue.
    730         trie.saveState(state);
    731         UBool nextContinues=FALSE;
    732         // Try all graphic characters; we only use those in test strings in this file.
    733 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    734         const int32_t minChar=0x20;
    735         const int32_t maxChar=0x7e;
    736 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    737         const int32_t minChar=0x40;
    738         const int32_t maxChar=0xfe;
    739 #else
    740         const int32_t minChar=0;
    741         const int32_t maxChar=0xff;
    742 #endif
    743         for(int32_t c=minChar; c<=maxChar; ++c) {
    744             if(trie.resetToState(state).next(c)) {
    745                 nextContinues=TRUE;
    746                 break;
    747             }
    748         }
    749         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
    750             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
    751                   "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
    752         }
    753         trie.reset();
    754     }
    755 }
    756 
    757 void BytesTrieTest::checkNextWithState(BytesTrie &trie,
    758                                        const StringAndValue data[], int32_t dataLength) {
    759     BytesTrie::State noState, state;
    760     for(int32_t i=0; i<dataLength; ++i) {
    761         if((i&1)==0) {
    762             // This should have no effect.
    763             trie.resetToState(noState);
    764         }
    765         const char *expectedString=data[i].s;
    766         int32_t stringLength=strlen(expectedString);
    767         int32_t partialLength=stringLength/3;
    768         for(int32_t j=0; j<partialLength; ++j) {
    769             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
    770                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
    771                 return;
    772             }
    773         }
    774         trie.saveState(state);
    775         UStringTrieResult resultAtState=trie.current();
    776         UStringTrieResult result;
    777         int32_t valueAtState=-99;
    778         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
    779             valueAtState=trie.getValue();
    780         }
    781         result=trie.next(0);  // mismatch
    782         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
    783             errln("trie.next(0) matched after part of %s", data[i].s);
    784         }
    785         if( resultAtState!=trie.resetToState(state).current() ||
    786             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
    787         ) {
    788             errln("trie.next(part of %s) changes current()/getValue() after "
    789                   "saveState/next(0)/resetToState",
    790                   data[i].s);
    791         } else if(!USTRINGTRIE_HAS_VALUE(
    792                       result=trie.next(expectedString+partialLength,
    793                                        stringLength-partialLength)) ||
    794                   result!=trie.current()) {
    795             errln("trie.next(rest of %s) does not seem to contain %s after "
    796                   "saveState/next(0)/resetToState",
    797                   data[i].s, data[i].s);
    798         } else if(!USTRINGTRIE_HAS_VALUE(
    799                       result=trie.resetToState(state).
    800                                   next(expectedString+partialLength,
    801                                        stringLength-partialLength)) ||
    802                   result!=trie.current()) {
    803             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
    804                   data[i].s);
    805         } else if(trie.getValue()!=data[i].value) {
    806             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    807                   data[i].s,
    808                   (long)trie.getValue(), (long)trie.getValue(),
    809                   (long)data[i].value, (long)data[i].value);
    810         }
    811         trie.reset();
    812     }
    813 }
    814 
    815 // next(string) is also tested in other functions,
    816 // but here we try to go partway through the string, and then beyond it.
    817 void BytesTrieTest::checkNextString(BytesTrie &trie,
    818                                     const StringAndValue data[], int32_t dataLength) {
    819     for(int32_t i=0; i<dataLength; ++i) {
    820         const char *expectedString=data[i].s;
    821         int32_t stringLength=strlen(expectedString);
    822         if(!trie.next(expectedString, stringLength/2)) {
    823             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
    824             continue;
    825         }
    826         // Test that we stop properly at the end of the string.
    827         if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
    828             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
    829         }
    830         trie.reset();
    831     }
    832 }
    833 
    834 void BytesTrieTest::checkIterator(const BytesTrie &trie,
    835                                   const StringAndValue data[], int32_t dataLength) {
    836     IcuTestErrorCode errorCode(*this, "checkIterator()");
    837     BytesTrie::Iterator iter(trie, 0, errorCode);
    838     if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
    839         return;
    840     }
    841     checkIterator(iter, data, dataLength);
    842 }
    843 
    844 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
    845                                   const StringAndValue data[], int32_t dataLength) {
    846     IcuTestErrorCode errorCode(*this, "checkIterator()");
    847     for(int32_t i=0; i<dataLength; ++i) {
    848         if(!iter.hasNext()) {
    849             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
    850             break;
    851         }
    852         UBool hasNext=iter.next(errorCode);
    853         if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
    854             break;
    855         }
    856         if(!hasNext) {
    857             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
    858             break;
    859         }
    860         if(iter.getString()!=StringPiece(data[i].s)) {
    861             errln("trie iterator next().getString()=%s but expected %s for item %d",
    862                   iter.getString().data(), data[i].s, (int)i);
    863         }
    864         if(iter.getValue()!=data[i].value) {
    865             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
    866                   (long)iter.getValue(), (long)iter.getValue(),
    867                   (long)data[i].value, (long)data[i].value,
    868                   (int)i, data[i].s);
    869         }
    870     }
    871     if(iter.hasNext()) {
    872         errln("trie iterator hasNext()=TRUE after all items");
    873     }
    874     UBool hasNext=iter.next(errorCode);
    875     errorCode.logIfFailureAndReset("trie iterator next() after all items");
    876     if(hasNext) {
    877         errln("trie iterator next()=TRUE after all items");
    878     }
    879 }
    880