Home | History | Annotate | Download | only in intltest
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2010-2014, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  ucharstrietest.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2010nov16
     12 *   created by: Markus W. Scherer
     13 */
     14 
     15 #include <string.h>
     16 
     17 #include "unicode/utypes.h"
     18 #include "unicode/appendable.h"
     19 #include "unicode/localpointer.h"
     20 #include "unicode/ucharstrie.h"
     21 #include "unicode/ucharstriebuilder.h"
     22 #include "unicode/uniset.h"
     23 #include "unicode/unistr.h"
     24 #include "intltest.h"
     25 #include "cmemory.h"
     26 
     27 struct StringAndValue {
     28     const char *s;
     29     int32_t value;
     30 };
     31 
     32 class UCharsTrieTest : public IntlTest {
     33 public:
     34     UCharsTrieTest();
     35     virtual ~UCharsTrieTest();
     36 
     37     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
     38     void TestBuilder();
     39     void TestEmpty();
     40     void Test_a();
     41     void Test_a_ab();
     42     void TestShortestBranch();
     43     void TestBranches();
     44     void TestLongSequence();
     45     void TestLongBranch();
     46     void TestValuesForState();
     47     void TestCompact();
     48     void TestFirstForCodePoint();
     49     void TestNextForCodePoint();
     50 
     51     UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
     52     void TestLargeTrie();
     53 
     54     UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
     55     void TestHasUniqueValue();
     56     void TestGetNextUChars();
     57     void TestIteratorFromBranch();
     58     void TestIteratorFromLinearMatch();
     59     void TestTruncatingIteratorFromRoot();
     60     void TestTruncatingIteratorFromLinearMatchShort();
     61     void TestTruncatingIteratorFromLinearMatchLong();
     62     void TestIteratorFromUChars();
     63 
     64     void checkData(const StringAndValue data[], int32_t dataLength);
     65     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
     66     UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
     67                           UStringTrieBuildOption buildOption);
     68     void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     69     void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     70     void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     71     void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     72     void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     73     void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
     74 
     75 private:
     76     UCharsTrieBuilder *builder_;
     77 };
     78 
     79 extern IntlTest *createUCharsTrieTest() {
     80     return new UCharsTrieTest();
     81 }
     82 
     83 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
     84     IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
     85     builder_=new UCharsTrieBuilder(errorCode);
     86 }
     87 
     88 UCharsTrieTest::~UCharsTrieTest() {
     89     delete builder_;
     90 }
     91 
     92 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
     93     if(exec) {
     94         logln("TestSuite UCharsTrieTest: ");
     95     }
     96     TESTCASE_AUTO_BEGIN;
     97     TESTCASE_AUTO(TestBuilder);
     98     TESTCASE_AUTO(TestEmpty);
     99     TESTCASE_AUTO(Test_a);
    100     TESTCASE_AUTO(Test_a_ab);
    101     TESTCASE_AUTO(TestShortestBranch);
    102     TESTCASE_AUTO(TestBranches);
    103     TESTCASE_AUTO(TestLongSequence);
    104     TESTCASE_AUTO(TestLongBranch);
    105     TESTCASE_AUTO(TestValuesForState);
    106     TESTCASE_AUTO(TestCompact);
    107     TESTCASE_AUTO(TestFirstForCodePoint);
    108     TESTCASE_AUTO(TestNextForCodePoint);
    109     TESTCASE_AUTO(TestLargeTrie);
    110     TESTCASE_AUTO(TestHasUniqueValue);
    111     TESTCASE_AUTO(TestGetNextUChars);
    112     TESTCASE_AUTO(TestIteratorFromBranch);
    113     TESTCASE_AUTO(TestIteratorFromLinearMatch);
    114     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
    115     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
    116     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
    117     TESTCASE_AUTO(TestIteratorFromUChars);
    118     TESTCASE_AUTO_END;
    119 }
    120 
    121 void UCharsTrieTest::TestBuilder() {
    122     IcuTestErrorCode errorCode(*this, "TestBuilder()");
    123     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
    124     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
    125         errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
    126         return;
    127     }
    128     // TODO: remove .build(...) once add() checks for duplicates.
    129     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
    130     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
    131         errln("UCharsTrieBuilder.add() did not detect duplicates");
    132         return;
    133     }
    134 }
    135 
    136 void UCharsTrieTest::TestEmpty() {
    137     static const StringAndValue data[]={
    138         { "", 0 }
    139     };
    140     checkData(data, UPRV_LENGTHOF(data));
    141 }
    142 
    143 void UCharsTrieTest::Test_a() {
    144     static const StringAndValue data[]={
    145         { "a", 1 }
    146     };
    147     checkData(data, UPRV_LENGTHOF(data));
    148 }
    149 
    150 void UCharsTrieTest::Test_a_ab() {
    151     static const StringAndValue data[]={
    152         { "a", 1 },
    153         { "ab", 100 }
    154     };
    155     checkData(data, UPRV_LENGTHOF(data));
    156 }
    157 
    158 void UCharsTrieTest::TestShortestBranch() {
    159     static const StringAndValue data[]={
    160         { "a", 1000 },
    161         { "b", 2000 }
    162     };
    163     checkData(data, UPRV_LENGTHOF(data));
    164 }
    165 
    166 void UCharsTrieTest::TestBranches() {
    167     static const StringAndValue data[]={
    168         { "a", 0x10 },
    169         { "cc", 0x40 },
    170         { "e", 0x100 },
    171         { "ggg", 0x400 },
    172         { "i", 0x1000 },
    173         { "kkkk", 0x4000 },
    174         { "n", 0x10000 },
    175         { "ppppp", 0x40000 },
    176         { "r", 0x100000 },
    177         { "sss", 0x200000 },
    178         { "t", 0x400000 },
    179         { "uu", 0x800000 },
    180         { "vv", 0x7fffffff },
    181         { "zz", (int32_t)0x80000000 }
    182     };
    183     for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
    184         logln("TestBranches length=%d", (int)length);
    185         checkData(data, length);
    186     }
    187 }
    188 
    189 void UCharsTrieTest::TestLongSequence() {
    190     static const StringAndValue data[]={
    191         { "a", -1 },
    192         // sequence of linear-match nodes
    193         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
    194         // more than 256 units
    195         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    196           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    197           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    198           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    199           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    200           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
    201     };
    202     checkData(data, UPRV_LENGTHOF(data));
    203 }
    204 
    205 void UCharsTrieTest::TestLongBranch() {
    206     // Split-branch and interesting compact-integer values.
    207     static const StringAndValue data[]={
    208         { "a", -2 },
    209         { "b", -1 },
    210         { "c", 0 },
    211         { "d2", 1 },
    212         { "f", 0x3f },
    213         { "g", 0x40 },
    214         { "h", 0x41 },
    215         { "j23", 0x1900 },
    216         { "j24", 0x19ff },
    217         { "j25", 0x1a00 },
    218         { "k2", 0x1a80 },
    219         { "k3", 0x1aff },
    220         { "l234567890", 0x1b00 },
    221         { "l234567890123", 0x1b01 },
    222         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
    223         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
    224         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
    225         { "r", 0x333333 },
    226         { "s2345", 0x4444444 },
    227         { "t234567890", 0x77777777 },
    228         { "z", (int32_t)0x80000001 }
    229     };
    230     checkData(data, UPRV_LENGTHOF(data));
    231 }
    232 
    233 void UCharsTrieTest::TestValuesForState() {
    234     // Check that saveState() and resetToState() interact properly
    235     // with next() and current().
    236     static const StringAndValue data[]={
    237         { "a", -1 },
    238         { "ab", -2 },
    239         { "abc", -3 },
    240         { "abcd", -4 },
    241         { "abcde", -5 },
    242         { "abcdef", -6 }
    243     };
    244     checkData(data, UPRV_LENGTHOF(data));
    245 }
    246 
    247 void UCharsTrieTest::TestCompact() {
    248     // Duplicate trailing strings and values provide opportunities for compacting.
    249     static const StringAndValue data[]={
    250         { "+", 0 },
    251         { "+august", 8 },
    252         { "+december", 12 },
    253         { "+july", 7 },
    254         { "+june", 6 },
    255         { "+november", 11 },
    256         { "+october", 10 },
    257         { "+september", 9 },
    258         { "-", 0 },
    259         { "-august", 8 },
    260         { "-december", 12 },
    261         { "-july", 7 },
    262         { "-june", 6 },
    263         { "-november", 11 },
    264         { "-october", 10 },
    265         { "-september", 9 },
    266         // The l+n branch (with its sub-nodes) is a duplicate but will be written
    267         // both times because each time it follows a different linear-match node.
    268         { "xjuly", 7 },
    269         { "xjune", 6 }
    270     };
    271     checkData(data, UPRV_LENGTHOF(data));
    272 }
    273 
    274 void UCharsTrieTest::TestFirstForCodePoint() {
    275     static const StringAndValue data[]={
    276         { "a", 1 },
    277         { "a\\ud800", 2 },
    278         { "a\\U00010000", 3 },
    279         { "\\ud840", 4 },
    280         { "\\U00020000\\udbff", 5 },
    281         { "\\U00020000\\U0010ffff", 6 },
    282         { "\\U00020000\\U0010ffffz", 7 },
    283         { "\\U00050000xy", 8 },
    284         { "\\U00050000xyz", 9 }
    285     };
    286     checkData(data, UPRV_LENGTHOF(data));
    287 }
    288 
    289 void UCharsTrieTest::TestNextForCodePoint() {
    290     static const StringAndValue data[]={
    291         { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
    292         { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
    293         { "\\u4dff\\U000103ff", 99999 }
    294     };
    295     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    296     if(trie.isNull()) {
    297         return;  // buildTrie() reported an error
    298     }
    299     UStringTrieResult result;
    300     if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    301         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    302         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    303         (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    304         (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    305         (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
    306         trie->getValue()!=2000000000
    307     ) {
    308         errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
    309     }
    310     if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    311         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    312         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    313         (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
    314         trie->getValue()!=44444
    315     ) {
    316         errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
    317     }
    318     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    319         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    320         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    321         (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current()  // no match for trail surrogate
    322     ) {
    323         errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
    324     }
    325     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    326         (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
    327         trie->getValue()!=99999
    328     ) {
    329         errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
    330     }
    331 }
    332 
    333 // Definitions in the anonymous namespace are invisible outside this file.
    334 namespace {
    335 
    336 // Generate (string, value) pairs.
    337 // The first string (before next()) will be empty.
    338 class Generator {
    339 public:
    340     Generator() : value(4711), num(0) {}
    341     void next() {
    342         UChar c;
    343         s.truncate(0);
    344         s.append(c=(UChar)(value>>16));
    345         s.append((UChar)(value>>4));
    346         if(value&1) {
    347             s.append((UChar)value);
    348         }
    349         set.add(c);
    350         value+=((value>>5)&0x7ff)*3+1;
    351         ++num;
    352     }
    353     const UnicodeString &getString() const { return s; }
    354     int32_t getValue() const { return value; }
    355     int32_t countUniqueFirstChars() const { return set.size(); }
    356     int32_t getIndex() const { return num; }
    357 
    358 private:
    359     UnicodeString s;
    360     UnicodeSet set;
    361     int32_t value;
    362     int32_t num;
    363 };
    364 
    365 }  // end namespace
    366 
    367 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
    368     IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
    369     Generator gen;
    370     builder_->clear();
    371     while(gen.countUniqueFirstChars()<numUniqueFirst) {
    372         builder_->add(gen.getString(), gen.getValue(), errorCode);
    373         gen.next();
    374     }
    375     logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
    376     UnicodeString trieUChars;
    377     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
    378     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
    379     return new UCharsTrie(trieUChars.getBuffer());
    380 }
    381 
    382 // Exercise a large branch node.
    383 void UCharsTrieTest::TestLargeTrie() {
    384     LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
    385     if(trie.isNull()) {
    386         return;  // buildTrie() reported an error
    387     }
    388     Generator gen;
    389     while(gen.countUniqueFirstChars()<1111) {
    390         UnicodeString x(gen.getString());
    391         int32_t value=gen.getValue();
    392         if(!x.isEmpty()) {
    393             if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
    394                 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
    395                       x[0], (long)gen.getIndex());
    396                 break;
    397             }
    398             x.remove(0, 1);
    399         }
    400         UStringTrieResult result=trie->next(x.getBuffer(), x.length());
    401         if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
    402             errln("next(%d chars U+%04X U+%04X)!=hasValue or "
    403                   "next()!=current() or getValue() wrong "
    404                   "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
    405             break;
    406         }
    407         gen.next();
    408     }
    409 }
    410 
    411 enum {
    412     u_a=0x61,
    413     u_b=0x62,
    414     u_c=0x63,
    415     u_j=0x6a,
    416     u_n=0x6e,
    417     u_r=0x72,
    418     u_u=0x75,
    419     u_y=0x79
    420 };
    421 
    422 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
    423     // All types of nodes leading to the same value,
    424     // for code coverage of recursive functions.
    425     // In particular, we need a lot of branches on some single level
    426     // to exercise a split-branch node.
    427     static const StringAndValue data[]={
    428         { "august", 8 },
    429         { "jan", 1 },
    430         { "jan.", 1 },
    431         { "jana", 1 },
    432         { "janbb", 1 },
    433         { "janc", 1 },
    434         { "janddd", 1 },
    435         { "janee", 1 },
    436         { "janef", 1 },
    437         { "janf", 1 },
    438         { "jangg", 1 },
    439         { "janh", 1 },
    440         { "janiiii", 1 },
    441         { "janj", 1 },
    442         { "jankk", 1 },
    443         { "jankl", 1 },
    444         { "jankmm", 1 },
    445         { "janl", 1 },
    446         { "janm", 1 },
    447         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    448         { "jano", 1 },
    449         { "janpp", 1 },
    450         { "janqqq", 1 },
    451         { "janr", 1 },
    452         { "januar", 1 },
    453         { "january", 1 },
    454         { "july", 7 },
    455         { "jun", 6 },
    456         { "jun.", 6 },
    457         { "june", 6 }
    458     };
    459     return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
    460 }
    461 
    462 void UCharsTrieTest::TestHasUniqueValue() {
    463     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    464     if(trie.isNull()) {
    465         return;  // buildTrie() reported an error
    466     }
    467     int32_t uniqueValue;
    468     if(trie->hasUniqueValue(uniqueValue)) {
    469         errln("unique value at root");
    470     }
    471     trie->next(u_j);
    472     trie->next(u_a);
    473     trie->next(u_n);
    474     // hasUniqueValue() directly after next()
    475     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
    476         errln("not unique value 1 after \"jan\"");
    477     }
    478     trie->first(u_j);
    479     trie->next(u_u);
    480     if(trie->hasUniqueValue(uniqueValue)) {
    481         errln("unique value after \"ju\"");
    482     }
    483     if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
    484         errln("not normal value 6 after \"jun\"");
    485     }
    486     // hasUniqueValue() after getValue()
    487     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
    488         errln("not unique value 6 after \"jun\"");
    489     }
    490     // hasUniqueValue() from within a linear-match node
    491     trie->first(u_a);
    492     trie->next(u_u);
    493     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
    494         errln("not unique value 8 after \"au\"");
    495     }
    496 }
    497 
    498 void UCharsTrieTest::TestGetNextUChars() {
    499     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    500     if(trie.isNull()) {
    501         return;  // buildTrie() reported an error
    502     }
    503     UnicodeString buffer;
    504     UnicodeStringAppendable app(buffer);
    505     int32_t count=trie->getNextUChars(app);
    506     if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
    507         errln("months getNextUChars()!=[aj] at root");
    508     }
    509     trie->next(u_j);
    510     trie->next(u_a);
    511     trie->next(u_n);
    512     // getNextUChars() directly after next()
    513     buffer.remove();
    514     count=trie->getNextUChars(app);
    515     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
    516         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
    517     }
    518     // getNextUChars() after getValue()
    519     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
    520     buffer.remove();
    521     count=trie->getNextUChars(app);
    522     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
    523         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
    524     }
    525     // getNextUChars() from a linear-match node
    526     trie->next(u_u);
    527     buffer.remove();
    528     count=trie->getNextUChars(app);
    529     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
    530         errln("months getNextUChars()!=[a] after \"janu\"");
    531     }
    532     trie->next(u_a);
    533     buffer.remove();
    534     count=trie->getNextUChars(app);
    535     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
    536         errln("months getNextUChars()!=[r] after \"janua\"");
    537     }
    538     trie->next(u_r);
    539     trie->next(u_y);
    540     // getNextUChars() after a final match
    541     buffer.remove();
    542     count=trie->getNextUChars(app);
    543     if(count!=0 || buffer.length()!=0) {
    544         errln("months getNextUChars()!=[] after \"january\"");
    545     }
    546 }
    547 
    548 void UCharsTrieTest::TestIteratorFromBranch() {
    549     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    550     if(trie.isNull()) {
    551         return;  // buildTrie() reported an error
    552     }
    553     // Go to a branch node.
    554     trie->next(u_j);
    555     trie->next(u_a);
    556     trie->next(u_n);
    557     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
    558     UCharsTrie::Iterator iter(*trie, 0, errorCode);
    559     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    560         return;
    561     }
    562     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    563     // following "jan".
    564     static const StringAndValue data[]={
    565         { "", 1 },
    566         { ".", 1 },
    567         { "a", 1 },
    568         { "bb", 1 },
    569         { "c", 1 },
    570         { "ddd", 1 },
    571         { "ee", 1 },
    572         { "ef", 1 },
    573         { "f", 1 },
    574         { "gg", 1 },
    575         { "h", 1 },
    576         { "iiii", 1 },
    577         { "j", 1 },
    578         { "kk", 1 },
    579         { "kl", 1 },
    580         { "kmm", 1 },
    581         { "l", 1 },
    582         { "m", 1 },
    583         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    584         { "o", 1 },
    585         { "pp", 1 },
    586         { "qqq", 1 },
    587         { "r", 1 },
    588         { "uar", 1 },
    589         { "uary", 1 }
    590     };
    591     checkIterator(iter, data, UPRV_LENGTHOF(data));
    592     // Reset, and we should get the same result.
    593     logln("after iter.reset()");
    594     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    595 }
    596 
    597 void UCharsTrieTest::TestIteratorFromLinearMatch() {
    598     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    599     if(trie.isNull()) {
    600         return;  // buildTrie() reported an error
    601     }
    602     // Go into a linear-match node.
    603     trie->next(u_j);
    604     trie->next(u_a);
    605     trie->next(u_n);
    606     trie->next(u_u);
    607     trie->next(u_a);
    608     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
    609     UCharsTrie::Iterator iter(*trie, 0, errorCode);
    610     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    611         return;
    612     }
    613     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    614     // following "janua".
    615     static const StringAndValue data[]={
    616         { "r", 1 },
    617         { "ry", 1 }
    618     };
    619     checkIterator(iter, data, UPRV_LENGTHOF(data));
    620     // Reset, and we should get the same result.
    621     logln("after iter.reset()");
    622     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    623 }
    624 
    625 void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
    626     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    627     if(trie.isNull()) {
    628         return;  // buildTrie() reported an error
    629     }
    630     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
    631     UCharsTrie::Iterator iter(*trie, 4, errorCode);
    632     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    633         return;
    634     }
    635     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
    636     // of each string, and no string duplicates from the truncation.
    637     static const StringAndValue data[]={
    638         { "augu", -1 },
    639         { "jan", 1 },
    640         { "jan.", 1 },
    641         { "jana", 1 },
    642         { "janb", -1 },
    643         { "janc", 1 },
    644         { "jand", -1 },
    645         { "jane", -1 },
    646         { "janf", 1 },
    647         { "jang", -1 },
    648         { "janh", 1 },
    649         { "jani", -1 },
    650         { "janj", 1 },
    651         { "jank", -1 },
    652         { "janl", 1 },
    653         { "janm", 1 },
    654         { "jann", -1 },
    655         { "jano", 1 },
    656         { "janp", -1 },
    657         { "janq", -1 },
    658         { "janr", 1 },
    659         { "janu", -1 },
    660         { "july", 7 },
    661         { "jun", 6 },
    662         { "jun.", 6 },
    663         { "june", 6 }
    664     };
    665     checkIterator(iter, data, UPRV_LENGTHOF(data));
    666     // Reset, and we should get the same result.
    667     logln("after iter.reset()");
    668     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    669 }
    670 
    671 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
    672     static const StringAndValue data[]={
    673         { "abcdef", 10 },
    674         { "abcdepq", 200 },
    675         { "abcdeyz", 3000 }
    676     };
    677     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    678     if(trie.isNull()) {
    679         return;  // buildTrie() reported an error
    680     }
    681     // Go into a linear-match node.
    682     trie->next(u_a);
    683     trie->next(u_b);
    684     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
    685     // Truncate within the linear-match node.
    686     UCharsTrie::Iterator iter(*trie, 2, errorCode);
    687     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    688         return;
    689     }
    690     static const StringAndValue expected[]={
    691         { "cd", -1 }
    692     };
    693     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
    694     // Reset, and we should get the same result.
    695     logln("after iter.reset()");
    696     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
    697 }
    698 
    699 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
    700     static const StringAndValue data[]={
    701         { "abcdef", 10 },
    702         { "abcdepq", 200 },
    703         { "abcdeyz", 3000 }
    704     };
    705     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    706     if(trie.isNull()) {
    707         return;  // buildTrie() reported an error
    708     }
    709     // Go into a linear-match node.
    710     trie->next(u_a);
    711     trie->next(u_b);
    712     trie->next(u_c);
    713     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
    714     // Truncate after the linear-match node.
    715     UCharsTrie::Iterator iter(*trie, 3, errorCode);
    716     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    717         return;
    718     }
    719     static const StringAndValue expected[]={
    720         { "def", 10 },
    721         { "dep", -1 },
    722         { "dey", -1 }
    723     };
    724     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
    725     // Reset, and we should get the same result.
    726     logln("after iter.reset()");
    727     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
    728 }
    729 
    730 void UCharsTrieTest::TestIteratorFromUChars() {
    731     static const StringAndValue data[]={
    732         { "mm", 3 },
    733         { "mmm", 33 },
    734         { "mmnop", 333 }
    735     };
    736     builder_->clear();
    737     IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
    738     for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
    739         builder_->add(data[i].s, data[i].value, errorCode);
    740     }
    741     UnicodeString trieUChars;
    742     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
    743     UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
    744     checkIterator(iter, data, UPRV_LENGTHOF(data));
    745 }
    746 
    747 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
    748     logln("checkData(dataLength=%d, fast)", (int)dataLength);
    749     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
    750     logln("checkData(dataLength=%d, small)", (int)dataLength);
    751     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
    752 }
    753 
    754 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
    755     LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
    756     if(trie.isNull()) {
    757         return;  // buildTrie() reported an error
    758     }
    759     checkFirst(*trie, data, dataLength);
    760     checkNext(*trie, data, dataLength);
    761     checkNextWithState(*trie, data, dataLength);
    762     checkNextString(*trie, data, dataLength);
    763     checkIterator(*trie, data, dataLength);
    764 }
    765 
    766 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
    767                                       UStringTrieBuildOption buildOption) {
    768     IcuTestErrorCode errorCode(*this, "buildTrie()");
    769     // Add the items to the trie builder in an interesting (not trivial, not random) order.
    770     int32_t index, step;
    771     if(dataLength&1) {
    772         // Odd number of items.
    773         index=dataLength/2;
    774         step=2;
    775     } else if((dataLength%3)!=0) {
    776         // Not a multiple of 3.
    777         index=dataLength/5;
    778         step=3;
    779     } else {
    780         index=dataLength-1;
    781         step=-1;
    782     }
    783     builder_->clear();
    784     for(int32_t i=0; i<dataLength; ++i) {
    785         builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
    786                       data[index].value, errorCode);
    787         index=(index+step)%dataLength;
    788     }
    789     UnicodeString trieUChars;
    790     builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
    791     LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
    792     if(!errorCode.logIfFailureAndReset("add()/build()")) {
    793         builder_->add("zzz", 999, errorCode);
    794         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
    795             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
    796         }
    797     }
    798     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
    799     UnicodeString trieUChars2;
    800     builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
    801     if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
    802         errln("builder.buildUnicodeString() before & after build() returned same array");
    803     }
    804     if(errorCode.isFailure()) {
    805         return NULL;
    806     }
    807     // Tries from either build() method should be identical but
    808     // UCharsTrie does not implement equals().
    809     // We just return either one.
    810     if((dataLength&1)!=0) {
    811         return trie.orphan();
    812     } else {
    813         return new UCharsTrie(trieUChars2.getBuffer());
    814     }
    815 }
    816 
    817 void UCharsTrieTest::checkFirst(UCharsTrie &trie,
    818                                 const StringAndValue data[], int32_t dataLength) {
    819     for(int32_t i=0; i<dataLength; ++i) {
    820         if(*data[i].s==0) {
    821             continue;  // skip empty string
    822         }
    823         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    824         UChar32 c=expectedString[0];
    825         UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
    826         UStringTrieResult firstResult=trie.first(c);
    827         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
    828         UStringTrieResult nextResult=trie.next(nextCp);
    829         if(firstResult!=trie.reset().next(c) ||
    830            firstResult!=trie.current() ||
    831            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
    832            nextResult!=trie.next(nextCp)
    833         ) {
    834             errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
    835                   c, data[i].s);
    836         }
    837         c=expectedString.char32At(0);
    838         int32_t cLength=U16_LENGTH(c);
    839         nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
    840         firstResult=trie.firstForCodePoint(c);
    841         firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
    842         nextResult=trie.nextForCodePoint(nextCp);
    843         if(firstResult!=trie.reset().nextForCodePoint(c) ||
    844            firstResult!=trie.current() ||
    845            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
    846            nextResult!=trie.nextForCodePoint(nextCp)
    847         ) {
    848             errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
    849                   c, data[i].s);
    850         }
    851     }
    852     trie.reset();
    853 }
    854 
    855 void UCharsTrieTest::checkNext(UCharsTrie &trie,
    856                                const StringAndValue data[], int32_t dataLength) {
    857     UCharsTrie::State state;
    858     for(int32_t i=0; i<dataLength; ++i) {
    859         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    860         int32_t stringLength= (i&1) ? -1 : expectedString.length();
    861         UStringTrieResult result;
    862         if( !USTRINGTRIE_HAS_VALUE(
    863                 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
    864             result!=trie.current()
    865         ) {
    866             errln("trie does not seem to contain %s", data[i].s);
    867         } else if(trie.getValue()!=data[i].value) {
    868             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    869                   data[i].s,
    870                   (long)trie.getValue(), (long)trie.getValue(),
    871                   (long)data[i].value, (long)data[i].value);
    872         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
    873             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
    874         }
    875         trie.reset();
    876         stringLength=expectedString.length();
    877         result=trie.current();
    878         for(int32_t j=0; j<stringLength; ++j) {
    879             if(!USTRINGTRIE_HAS_NEXT(result)) {
    880                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
    881                 break;
    882             }
    883             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
    884                 trie.getValue();
    885                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
    886                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
    887                     break;
    888                 }
    889             }
    890             result=trie.next(expectedString[j]);
    891             if(!USTRINGTRIE_MATCHES(result)) {
    892                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
    893                 break;
    894             }
    895             if(result!=trie.current()) {
    896                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
    897                 break;
    898             }
    899         }
    900         if(!USTRINGTRIE_HAS_VALUE(result)) {
    901             errln("trie.next()!=hasValue at the end of %s", data[i].s);
    902             continue;
    903         }
    904         trie.getValue();
    905         if(result!=trie.current()) {
    906             errln("trie.current() != current()+getValue()+current() after end of %s",
    907                   data[i].s);
    908         }
    909         // Compare the final current() with whether next() can actually continue.
    910         trie.saveState(state);
    911         UBool nextContinues=FALSE;
    912         for(int32_t c=0x20; c<0xe000; ++c) {
    913             if(c==0x80) {
    914                 c=0xd800;  // Check for ASCII and surrogates but not all of the BMP.
    915             }
    916             if(trie.resetToState(state).next(c)) {
    917                 nextContinues=TRUE;
    918                 break;
    919             }
    920         }
    921         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
    922             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
    923                   "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
    924         }
    925         trie.reset();
    926     }
    927 }
    928 
    929 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
    930                                         const StringAndValue data[], int32_t dataLength) {
    931     UCharsTrie::State noState, state;
    932     for(int32_t i=0; i<dataLength; ++i) {
    933         if((i&1)==0) {
    934             // This should have no effect.
    935             trie.resetToState(noState);
    936         }
    937         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    938         int32_t stringLength=expectedString.length();
    939         int32_t partialLength=stringLength/3;
    940         for(int32_t j=0; j<partialLength; ++j) {
    941             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
    942                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
    943                 return;
    944             }
    945         }
    946         trie.saveState(state);
    947         UStringTrieResult resultAtState=trie.current();
    948         UStringTrieResult result;
    949         int32_t valueAtState=-99;
    950         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
    951             valueAtState=trie.getValue();
    952         }
    953         result=trie.next(0);  // mismatch
    954         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
    955             errln("trie.next(0) matched after part of %s", data[i].s);
    956         }
    957         if( resultAtState!=trie.resetToState(state).current() ||
    958             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
    959         ) {
    960             errln("trie.next(part of %s) changes current()/getValue() after "
    961                   "saveState/next(0)/resetToState",
    962                   data[i].s);
    963         } else if(!USTRINGTRIE_HAS_VALUE(
    964                       result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
    965                                        stringLength-partialLength)) ||
    966                   result!=trie.current()) {
    967             errln("trie.next(rest of %s) does not seem to contain %s after "
    968                   "saveState/next(0)/resetToState",
    969                   data[i].s, data[i].s);
    970         } else if(!USTRINGTRIE_HAS_VALUE(
    971                       result=trie.resetToState(state).
    972                                   next(expectedString.getTerminatedBuffer()+partialLength,
    973                                        stringLength-partialLength)) ||
    974                   result!=trie.current()) {
    975             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
    976                   data[i].s);
    977         } else if(trie.getValue()!=data[i].value) {
    978             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    979                   data[i].s,
    980                   (long)trie.getValue(), (long)trie.getValue(),
    981                   (long)data[i].value, (long)data[i].value);
    982         }
    983         trie.reset();
    984     }
    985 }
    986 
    987 // next(string) is also tested in other functions,
    988 // but here we try to go partway through the string, and then beyond it.
    989 void UCharsTrieTest::checkNextString(UCharsTrie &trie,
    990                                      const StringAndValue data[], int32_t dataLength) {
    991     for(int32_t i=0; i<dataLength; ++i) {
    992         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    993         int32_t stringLength=expectedString.length();
    994         if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
    995             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
    996             continue;
    997         }
    998         // Test that we stop properly at the end of the string.
    999         if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
   1000                      stringLength+1-stringLength/2)) {
   1001             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
   1002         }
   1003         trie.reset();
   1004     }
   1005 }
   1006 
   1007 void UCharsTrieTest::checkIterator(UCharsTrie &trie,
   1008                                    const StringAndValue data[], int32_t dataLength) {
   1009     IcuTestErrorCode errorCode(*this, "checkIterator()");
   1010     UCharsTrie::Iterator iter(trie, 0, errorCode);
   1011     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
   1012         return;
   1013     }
   1014     checkIterator(iter, data, dataLength);
   1015 }
   1016 
   1017 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
   1018                                    const StringAndValue data[], int32_t dataLength) {
   1019     IcuTestErrorCode errorCode(*this, "checkIterator()");
   1020     for(int32_t i=0; i<dataLength; ++i) {
   1021         if(!iter.hasNext()) {
   1022             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
   1023             break;
   1024         }
   1025         UBool hasNext=iter.next(errorCode);
   1026         if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
   1027             break;
   1028         }
   1029         if(!hasNext) {
   1030             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
   1031             break;
   1032         }
   1033         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
   1034         if(iter.getString()!=expectedString) {
   1035             char buffer[1000];
   1036             UnicodeString invString(prettify(iter.getString()));
   1037             invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV);
   1038             errln("trie iterator next().getString()=%s but expected %s for item %d",
   1039                   buffer, data[i].s, (int)i);
   1040         }
   1041         if(iter.getValue()!=data[i].value) {
   1042             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
   1043                   (long)iter.getValue(), (long)iter.getValue(),
   1044                   (long)data[i].value, (long)data[i].value,
   1045                   (int)i, data[i].s);
   1046         }
   1047     }
   1048     if(iter.hasNext()) {
   1049         errln("trie iterator hasNext()=TRUE after all items");
   1050     }
   1051     UBool hasNext=iter.next(errorCode);
   1052     errorCode.logIfFailureAndReset("trie iterator next() after all items");
   1053     if(hasNext) {
   1054         errln("trie iterator next()=TRUE after all items");
   1055     }
   1056 }
   1057