Home | History | Annotate | Download | only in intltest
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2010-2014, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  ucharstrietest.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2010nov16
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include <string.h>
     18 
     19 #include "unicode/utypes.h"
     20 #include "unicode/appendable.h"
     21 #include "unicode/localpointer.h"
     22 #include "unicode/ucharstrie.h"
     23 #include "unicode/ucharstriebuilder.h"
     24 #include "unicode/uniset.h"
     25 #include "unicode/unistr.h"
     26 #include "intltest.h"
     27 #include "cmemory.h"
     28 
     29 struct StringAndValue {
     30     const char *s;
     31     int32_t value;
     32 };
     33 
     34 class UCharsTrieTest : public IntlTest {
     35 public:
     36     UCharsTrieTest();
     37     virtual ~UCharsTrieTest();
     38 
     39     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
     40     void TestBuilder();
     41     void TestEmpty();
     42     void Test_a();
     43     void Test_a_ab();
     44     void TestShortestBranch();
     45     void TestBranches();
     46     void TestLongSequence();
     47     void TestLongBranch();
     48     void TestValuesForState();
     49     void TestCompact();
     50     void TestFirstForCodePoint();
     51     void TestNextForCodePoint();
     52 
     53     UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
     54     void TestLargeTrie();
     55 
     56     UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
     57     void TestHasUniqueValue();
     58     void TestGetNextUChars();
     59     void TestIteratorFromBranch();
     60     void TestIteratorFromLinearMatch();
     61     void TestTruncatingIteratorFromRoot();
     62     void TestTruncatingIteratorFromLinearMatchShort();
     63     void TestTruncatingIteratorFromLinearMatchLong();
     64     void TestIteratorFromUChars();
     65 
     66     void checkData(const StringAndValue data[], int32_t dataLength);
     67     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
     68     UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
     69                           UStringTrieBuildOption buildOption);
     70     void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     71     void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     72     void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     73     void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     74     void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     75     void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
     76 
     77 private:
     78     UCharsTrieBuilder *builder_;
     79 };
     80 
     81 extern IntlTest *createUCharsTrieTest() {
     82     return new UCharsTrieTest();
     83 }
     84 
     85 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
     86     IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
     87     builder_=new UCharsTrieBuilder(errorCode);
     88 }
     89 
     90 UCharsTrieTest::~UCharsTrieTest() {
     91     delete builder_;
     92 }
     93 
     94 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
     95     if(exec) {
     96         logln("TestSuite UCharsTrieTest: ");
     97     }
     98     TESTCASE_AUTO_BEGIN;
     99     TESTCASE_AUTO(TestBuilder);
    100     TESTCASE_AUTO(TestEmpty);
    101     TESTCASE_AUTO(Test_a);
    102     TESTCASE_AUTO(Test_a_ab);
    103     TESTCASE_AUTO(TestShortestBranch);
    104     TESTCASE_AUTO(TestBranches);
    105     TESTCASE_AUTO(TestLongSequence);
    106     TESTCASE_AUTO(TestLongBranch);
    107     TESTCASE_AUTO(TestValuesForState);
    108     TESTCASE_AUTO(TestCompact);
    109     TESTCASE_AUTO(TestFirstForCodePoint);
    110     TESTCASE_AUTO(TestNextForCodePoint);
    111     TESTCASE_AUTO(TestLargeTrie);
    112     TESTCASE_AUTO(TestHasUniqueValue);
    113     TESTCASE_AUTO(TestGetNextUChars);
    114     TESTCASE_AUTO(TestIteratorFromBranch);
    115     TESTCASE_AUTO(TestIteratorFromLinearMatch);
    116     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
    117     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
    118     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
    119     TESTCASE_AUTO(TestIteratorFromUChars);
    120     TESTCASE_AUTO_END;
    121 }
    122 
    123 void UCharsTrieTest::TestBuilder() {
    124     IcuTestErrorCode errorCode(*this, "TestBuilder()");
    125     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
    126     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
    127         errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
    128         return;
    129     }
    130     // TODO: remove .build(...) once add() checks for duplicates.
    131     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
    132     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
    133         errln("UCharsTrieBuilder.add() did not detect duplicates");
    134         return;
    135     }
    136 }
    137 
    138 void UCharsTrieTest::TestEmpty() {
    139     static const StringAndValue data[]={
    140         { "", 0 }
    141     };
    142     checkData(data, UPRV_LENGTHOF(data));
    143 }
    144 
    145 void UCharsTrieTest::Test_a() {
    146     static const StringAndValue data[]={
    147         { "a", 1 }
    148     };
    149     checkData(data, UPRV_LENGTHOF(data));
    150 }
    151 
    152 void UCharsTrieTest::Test_a_ab() {
    153     static const StringAndValue data[]={
    154         { "a", 1 },
    155         { "ab", 100 }
    156     };
    157     checkData(data, UPRV_LENGTHOF(data));
    158 }
    159 
    160 void UCharsTrieTest::TestShortestBranch() {
    161     static const StringAndValue data[]={
    162         { "a", 1000 },
    163         { "b", 2000 }
    164     };
    165     checkData(data, UPRV_LENGTHOF(data));
    166 }
    167 
    168 void UCharsTrieTest::TestBranches() {
    169     static const StringAndValue data[]={
    170         { "a", 0x10 },
    171         { "cc", 0x40 },
    172         { "e", 0x100 },
    173         { "ggg", 0x400 },
    174         { "i", 0x1000 },
    175         { "kkkk", 0x4000 },
    176         { "n", 0x10000 },
    177         { "ppppp", 0x40000 },
    178         { "r", 0x100000 },
    179         { "sss", 0x200000 },
    180         { "t", 0x400000 },
    181         { "uu", 0x800000 },
    182         { "vv", 0x7fffffff },
    183         { "zz", (int32_t)0x80000000 }
    184     };
    185     for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
    186         logln("TestBranches length=%d", (int)length);
    187         checkData(data, length);
    188     }
    189 }
    190 
    191 void UCharsTrieTest::TestLongSequence() {
    192     static const StringAndValue data[]={
    193         { "a", -1 },
    194         // sequence of linear-match nodes
    195         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
    196         // more than 256 units
    197         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    198           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    199           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    200           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    201           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    202           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
    203     };
    204     checkData(data, UPRV_LENGTHOF(data));
    205 }
    206 
    207 void UCharsTrieTest::TestLongBranch() {
    208     // Split-branch and interesting compact-integer values.
    209     static const StringAndValue data[]={
    210         { "a", -2 },
    211         { "b", -1 },
    212         { "c", 0 },
    213         { "d2", 1 },
    214         { "f", 0x3f },
    215         { "g", 0x40 },
    216         { "h", 0x41 },
    217         { "j23", 0x1900 },
    218         { "j24", 0x19ff },
    219         { "j25", 0x1a00 },
    220         { "k2", 0x1a80 },
    221         { "k3", 0x1aff },
    222         { "l234567890", 0x1b00 },
    223         { "l234567890123", 0x1b01 },
    224         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
    225         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
    226         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
    227         { "r", 0x333333 },
    228         { "s2345", 0x4444444 },
    229         { "t234567890", 0x77777777 },
    230         { "z", (int32_t)0x80000001 }
    231     };
    232     checkData(data, UPRV_LENGTHOF(data));
    233 }
    234 
    235 void UCharsTrieTest::TestValuesForState() {
    236     // Check that saveState() and resetToState() interact properly
    237     // with next() and current().
    238     static const StringAndValue data[]={
    239         { "a", -1 },
    240         { "ab", -2 },
    241         { "abc", -3 },
    242         { "abcd", -4 },
    243         { "abcde", -5 },
    244         { "abcdef", -6 }
    245     };
    246     checkData(data, UPRV_LENGTHOF(data));
    247 }
    248 
    249 void UCharsTrieTest::TestCompact() {
    250     // Duplicate trailing strings and values provide opportunities for compacting.
    251     static const StringAndValue data[]={
    252         { "+", 0 },
    253         { "+august", 8 },
    254         { "+december", 12 },
    255         { "+july", 7 },
    256         { "+june", 6 },
    257         { "+november", 11 },
    258         { "+october", 10 },
    259         { "+september", 9 },
    260         { "-", 0 },
    261         { "-august", 8 },
    262         { "-december", 12 },
    263         { "-july", 7 },
    264         { "-june", 6 },
    265         { "-november", 11 },
    266         { "-october", 10 },
    267         { "-september", 9 },
    268         // The l+n branch (with its sub-nodes) is a duplicate but will be written
    269         // both times because each time it follows a different linear-match node.
    270         { "xjuly", 7 },
    271         { "xjune", 6 }
    272     };
    273     checkData(data, UPRV_LENGTHOF(data));
    274 }
    275 
    276 void UCharsTrieTest::TestFirstForCodePoint() {
    277     static const StringAndValue data[]={
    278         { "a", 1 },
    279         { "a\\ud800", 2 },
    280         { "a\\U00010000", 3 },
    281         { "\\ud840", 4 },
    282         { "\\U00020000\\udbff", 5 },
    283         { "\\U00020000\\U0010ffff", 6 },
    284         { "\\U00020000\\U0010ffffz", 7 },
    285         { "\\U00050000xy", 8 },
    286         { "\\U00050000xyz", 9 }
    287     };
    288     checkData(data, UPRV_LENGTHOF(data));
    289 }
    290 
    291 void UCharsTrieTest::TestNextForCodePoint() {
    292     static const StringAndValue data[]={
    293         { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
    294         { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
    295         { "\\u4dff\\U000103ff", 99999 }
    296     };
    297     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    298     if(trie.isNull()) {
    299         return;  // buildTrie() reported an error
    300     }
    301     UStringTrieResult result;
    302     if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    303         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    304         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    305         (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    306         (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    307         (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
    308         trie->getValue()!=2000000000
    309     ) {
    310         errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
    311     }
    312     if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    313         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    314         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    315         (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
    316         trie->getValue()!=44444
    317     ) {
    318         errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
    319     }
    320     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    321         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    322         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    323         (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current()  // no match for trail surrogate
    324     ) {
    325         errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
    326     }
    327     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    328         (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
    329         trie->getValue()!=99999
    330     ) {
    331         errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
    332     }
    333 }
    334 
    335 // Definitions in the anonymous namespace are invisible outside this file.
    336 namespace {
    337 
    338 // Generate (string, value) pairs.
    339 // The first string (before next()) will be empty.
    340 class Generator {
    341 public:
    342     Generator() : value(4711), num(0) {}
    343     void next() {
    344         UChar c;
    345         s.truncate(0);
    346         s.append(c=(UChar)(value>>16));
    347         s.append((UChar)(value>>4));
    348         if(value&1) {
    349             s.append((UChar)value);
    350         }
    351         set.add(c);
    352         value+=((value>>5)&0x7ff)*3+1;
    353         ++num;
    354     }
    355     const UnicodeString &getString() const { return s; }
    356     int32_t getValue() const { return value; }
    357     int32_t countUniqueFirstChars() const { return set.size(); }
    358     int32_t getIndex() const { return num; }
    359 
    360 private:
    361     UnicodeString s;
    362     UnicodeSet set;
    363     int32_t value;
    364     int32_t num;
    365 };
    366 
    367 }  // end namespace
    368 
    369 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
    370     IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
    371     Generator gen;
    372     builder_->clear();
    373     while(gen.countUniqueFirstChars()<numUniqueFirst) {
    374         builder_->add(gen.getString(), gen.getValue(), errorCode);
    375         gen.next();
    376     }
    377     logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
    378     UnicodeString trieUChars;
    379     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
    380     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
    381     return new UCharsTrie(trieUChars.getBuffer());
    382 }
    383 
    384 // Exercise a large branch node.
    385 void UCharsTrieTest::TestLargeTrie() {
    386     LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
    387     if(trie.isNull()) {
    388         return;  // buildTrie() reported an error
    389     }
    390     Generator gen;
    391     while(gen.countUniqueFirstChars()<1111) {
    392         UnicodeString x(gen.getString());
    393         int32_t value=gen.getValue();
    394         if(!x.isEmpty()) {
    395             if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
    396                 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
    397                       x[0], (long)gen.getIndex());
    398                 break;
    399             }
    400             x.remove(0, 1);
    401         }
    402         UStringTrieResult result=trie->next(x.getBuffer(), x.length());
    403         if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
    404             errln("next(%d chars U+%04X U+%04X)!=hasValue or "
    405                   "next()!=current() or getValue() wrong "
    406                   "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
    407             break;
    408         }
    409         gen.next();
    410     }
    411 }
    412 
    413 enum {
    414     u_a=0x61,
    415     u_b=0x62,
    416     u_c=0x63,
    417     u_j=0x6a,
    418     u_n=0x6e,
    419     u_r=0x72,
    420     u_u=0x75,
    421     u_y=0x79
    422 };
    423 
    424 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
    425     // All types of nodes leading to the same value,
    426     // for code coverage of recursive functions.
    427     // In particular, we need a lot of branches on some single level
    428     // to exercise a split-branch node.
    429     static const StringAndValue data[]={
    430         { "august", 8 },
    431         { "jan", 1 },
    432         { "jan.", 1 },
    433         { "jana", 1 },
    434         { "janbb", 1 },
    435         { "janc", 1 },
    436         { "janddd", 1 },
    437         { "janee", 1 },
    438         { "janef", 1 },
    439         { "janf", 1 },
    440         { "jangg", 1 },
    441         { "janh", 1 },
    442         { "janiiii", 1 },
    443         { "janj", 1 },
    444         { "jankk", 1 },
    445         { "jankl", 1 },
    446         { "jankmm", 1 },
    447         { "janl", 1 },
    448         { "janm", 1 },
    449         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    450         { "jano", 1 },
    451         { "janpp", 1 },
    452         { "janqqq", 1 },
    453         { "janr", 1 },
    454         { "januar", 1 },
    455         { "january", 1 },
    456         { "july", 7 },
    457         { "jun", 6 },
    458         { "jun.", 6 },
    459         { "june", 6 }
    460     };
    461     return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
    462 }
    463 
    464 void UCharsTrieTest::TestHasUniqueValue() {
    465     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    466     if(trie.isNull()) {
    467         return;  // buildTrie() reported an error
    468     }
    469     int32_t uniqueValue;
    470     if(trie->hasUniqueValue(uniqueValue)) {
    471         errln("unique value at root");
    472     }
    473     trie->next(u_j);
    474     trie->next(u_a);
    475     trie->next(u_n);
    476     // hasUniqueValue() directly after next()
    477     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
    478         errln("not unique value 1 after \"jan\"");
    479     }
    480     trie->first(u_j);
    481     trie->next(u_u);
    482     if(trie->hasUniqueValue(uniqueValue)) {
    483         errln("unique value after \"ju\"");
    484     }
    485     if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
    486         errln("not normal value 6 after \"jun\"");
    487     }
    488     // hasUniqueValue() after getValue()
    489     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
    490         errln("not unique value 6 after \"jun\"");
    491     }
    492     // hasUniqueValue() from within a linear-match node
    493     trie->first(u_a);
    494     trie->next(u_u);
    495     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
    496         errln("not unique value 8 after \"au\"");
    497     }
    498 }
    499 
    500 void UCharsTrieTest::TestGetNextUChars() {
    501     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    502     if(trie.isNull()) {
    503         return;  // buildTrie() reported an error
    504     }
    505     UnicodeString buffer;
    506     UnicodeStringAppendable app(buffer);
    507     int32_t count=trie->getNextUChars(app);
    508     if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
    509         errln("months getNextUChars()!=[aj] at root");
    510     }
    511     trie->next(u_j);
    512     trie->next(u_a);
    513     trie->next(u_n);
    514     // getNextUChars() directly after next()
    515     buffer.remove();
    516     count=trie->getNextUChars(app);
    517     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
    518         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
    519     }
    520     // getNextUChars() after getValue()
    521     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
    522     buffer.remove();
    523     count=trie->getNextUChars(app);
    524     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
    525         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
    526     }
    527     // getNextUChars() from a linear-match node
    528     trie->next(u_u);
    529     buffer.remove();
    530     count=trie->getNextUChars(app);
    531     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
    532         errln("months getNextUChars()!=[a] after \"janu\"");
    533     }
    534     trie->next(u_a);
    535     buffer.remove();
    536     count=trie->getNextUChars(app);
    537     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
    538         errln("months getNextUChars()!=[r] after \"janua\"");
    539     }
    540     trie->next(u_r);
    541     trie->next(u_y);
    542     // getNextUChars() after a final match
    543     buffer.remove();
    544     count=trie->getNextUChars(app);
    545     if(count!=0 || buffer.length()!=0) {
    546         errln("months getNextUChars()!=[] after \"january\"");
    547     }
    548 }
    549 
    550 void UCharsTrieTest::TestIteratorFromBranch() {
    551     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    552     if(trie.isNull()) {
    553         return;  // buildTrie() reported an error
    554     }
    555     // Go to a branch node.
    556     trie->next(u_j);
    557     trie->next(u_a);
    558     trie->next(u_n);
    559     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
    560     UCharsTrie::Iterator iter(*trie, 0, errorCode);
    561     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    562         return;
    563     }
    564     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    565     // following "jan".
    566     static const StringAndValue data[]={
    567         { "", 1 },
    568         { ".", 1 },
    569         { "a", 1 },
    570         { "bb", 1 },
    571         { "c", 1 },
    572         { "ddd", 1 },
    573         { "ee", 1 },
    574         { "ef", 1 },
    575         { "f", 1 },
    576         { "gg", 1 },
    577         { "h", 1 },
    578         { "iiii", 1 },
    579         { "j", 1 },
    580         { "kk", 1 },
    581         { "kl", 1 },
    582         { "kmm", 1 },
    583         { "l", 1 },
    584         { "m", 1 },
    585         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    586         { "o", 1 },
    587         { "pp", 1 },
    588         { "qqq", 1 },
    589         { "r", 1 },
    590         { "uar", 1 },
    591         { "uary", 1 }
    592     };
    593     checkIterator(iter, data, UPRV_LENGTHOF(data));
    594     // Reset, and we should get the same result.
    595     logln("after iter.reset()");
    596     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    597 }
    598 
    599 void UCharsTrieTest::TestIteratorFromLinearMatch() {
    600     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    601     if(trie.isNull()) {
    602         return;  // buildTrie() reported an error
    603     }
    604     // Go into a linear-match node.
    605     trie->next(u_j);
    606     trie->next(u_a);
    607     trie->next(u_n);
    608     trie->next(u_u);
    609     trie->next(u_a);
    610     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
    611     UCharsTrie::Iterator iter(*trie, 0, errorCode);
    612     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    613         return;
    614     }
    615     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    616     // following "janua".
    617     static const StringAndValue data[]={
    618         { "r", 1 },
    619         { "ry", 1 }
    620     };
    621     checkIterator(iter, data, UPRV_LENGTHOF(data));
    622     // Reset, and we should get the same result.
    623     logln("after iter.reset()");
    624     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    625 }
    626 
    627 void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
    628     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    629     if(trie.isNull()) {
    630         return;  // buildTrie() reported an error
    631     }
    632     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
    633     UCharsTrie::Iterator iter(*trie, 4, errorCode);
    634     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    635         return;
    636     }
    637     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
    638     // of each string, and no string duplicates from the truncation.
    639     static const StringAndValue data[]={
    640         { "augu", -1 },
    641         { "jan", 1 },
    642         { "jan.", 1 },
    643         { "jana", 1 },
    644         { "janb", -1 },
    645         { "janc", 1 },
    646         { "jand", -1 },
    647         { "jane", -1 },
    648         { "janf", 1 },
    649         { "jang", -1 },
    650         { "janh", 1 },
    651         { "jani", -1 },
    652         { "janj", 1 },
    653         { "jank", -1 },
    654         { "janl", 1 },
    655         { "janm", 1 },
    656         { "jann", -1 },
    657         { "jano", 1 },
    658         { "janp", -1 },
    659         { "janq", -1 },
    660         { "janr", 1 },
    661         { "janu", -1 },
    662         { "july", 7 },
    663         { "jun", 6 },
    664         { "jun.", 6 },
    665         { "june", 6 }
    666     };
    667     checkIterator(iter, data, UPRV_LENGTHOF(data));
    668     // Reset, and we should get the same result.
    669     logln("after iter.reset()");
    670     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    671 }
    672 
    673 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
    674     static const StringAndValue data[]={
    675         { "abcdef", 10 },
    676         { "abcdepq", 200 },
    677         { "abcdeyz", 3000 }
    678     };
    679     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    680     if(trie.isNull()) {
    681         return;  // buildTrie() reported an error
    682     }
    683     // Go into a linear-match node.
    684     trie->next(u_a);
    685     trie->next(u_b);
    686     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
    687     // Truncate within the linear-match node.
    688     UCharsTrie::Iterator iter(*trie, 2, errorCode);
    689     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    690         return;
    691     }
    692     static const StringAndValue expected[]={
    693         { "cd", -1 }
    694     };
    695     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
    696     // Reset, and we should get the same result.
    697     logln("after iter.reset()");
    698     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
    699 }
    700 
    701 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
    702     static const StringAndValue data[]={
    703         { "abcdef", 10 },
    704         { "abcdepq", 200 },
    705         { "abcdeyz", 3000 }
    706     };
    707     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    708     if(trie.isNull()) {
    709         return;  // buildTrie() reported an error
    710     }
    711     // Go into a linear-match node.
    712     trie->next(u_a);
    713     trie->next(u_b);
    714     trie->next(u_c);
    715     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
    716     // Truncate after the linear-match node.
    717     UCharsTrie::Iterator iter(*trie, 3, errorCode);
    718     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    719         return;
    720     }
    721     static const StringAndValue expected[]={
    722         { "def", 10 },
    723         { "dep", -1 },
    724         { "dey", -1 }
    725     };
    726     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
    727     // Reset, and we should get the same result.
    728     logln("after iter.reset()");
    729     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
    730 }
    731 
    732 void UCharsTrieTest::TestIteratorFromUChars() {
    733     static const StringAndValue data[]={
    734         { "mm", 3 },
    735         { "mmm", 33 },
    736         { "mmnop", 333 }
    737     };
    738     builder_->clear();
    739     IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
    740     for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
    741         builder_->add(data[i].s, data[i].value, errorCode);
    742     }
    743     UnicodeString trieUChars;
    744     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
    745     UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
    746     checkIterator(iter, data, UPRV_LENGTHOF(data));
    747 }
    748 
    749 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
    750     logln("checkData(dataLength=%d, fast)", (int)dataLength);
    751     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
    752     logln("checkData(dataLength=%d, small)", (int)dataLength);
    753     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
    754 }
    755 
    756 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
    757     LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
    758     if(trie.isNull()) {
    759         return;  // buildTrie() reported an error
    760     }
    761     checkFirst(*trie, data, dataLength);
    762     checkNext(*trie, data, dataLength);
    763     checkNextWithState(*trie, data, dataLength);
    764     checkNextString(*trie, data, dataLength);
    765     checkIterator(*trie, data, dataLength);
    766 }
    767 
    768 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
    769                                       UStringTrieBuildOption buildOption) {
    770     IcuTestErrorCode errorCode(*this, "buildTrie()");
    771     // Add the items to the trie builder in an interesting (not trivial, not random) order.
    772     int32_t index, step;
    773     if(dataLength&1) {
    774         // Odd number of items.
    775         index=dataLength/2;
    776         step=2;
    777     } else if((dataLength%3)!=0) {
    778         // Not a multiple of 3.
    779         index=dataLength/5;
    780         step=3;
    781     } else {
    782         index=dataLength-1;
    783         step=-1;
    784     }
    785     builder_->clear();
    786     for(int32_t i=0; i<dataLength; ++i) {
    787         builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
    788                       data[index].value, errorCode);
    789         index=(index+step)%dataLength;
    790     }
    791     UnicodeString trieUChars;
    792     builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
    793     LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
    794     if(!errorCode.logIfFailureAndReset("add()/build()")) {
    795         builder_->add("zzz", 999, errorCode);
    796         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
    797             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
    798         }
    799     }
    800     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
    801     UnicodeString trieUChars2;
    802     builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
    803     if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
    804         errln("builder.buildUnicodeString() before & after build() returned same array");
    805     }
    806     if(errorCode.isFailure()) {
    807         return NULL;
    808     }
    809     // Tries from either build() method should be identical but
    810     // UCharsTrie does not implement equals().
    811     // We just return either one.
    812     if((dataLength&1)!=0) {
    813         return trie.orphan();
    814     } else {
    815         return new UCharsTrie(trieUChars2.getBuffer());
    816     }
    817 }
    818 
    819 void UCharsTrieTest::checkFirst(UCharsTrie &trie,
    820                                 const StringAndValue data[], int32_t dataLength) {
    821     for(int32_t i=0; i<dataLength; ++i) {
    822         if(*data[i].s==0) {
    823             continue;  // skip empty string
    824         }
    825         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    826         UChar32 c=expectedString[0];
    827         UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
    828         UStringTrieResult firstResult=trie.first(c);
    829         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
    830         UStringTrieResult nextResult=trie.next(nextCp);
    831         if(firstResult!=trie.reset().next(c) ||
    832            firstResult!=trie.current() ||
    833            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
    834            nextResult!=trie.next(nextCp)
    835         ) {
    836             errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
    837                   c, data[i].s);
    838         }
    839         c=expectedString.char32At(0);
    840         int32_t cLength=U16_LENGTH(c);
    841         nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
    842         firstResult=trie.firstForCodePoint(c);
    843         firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
    844         nextResult=trie.nextForCodePoint(nextCp);
    845         if(firstResult!=trie.reset().nextForCodePoint(c) ||
    846            firstResult!=trie.current() ||
    847            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
    848            nextResult!=trie.nextForCodePoint(nextCp)
    849         ) {
    850             errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
    851                   c, data[i].s);
    852         }
    853     }
    854     trie.reset();
    855 }
    856 
    857 void UCharsTrieTest::checkNext(UCharsTrie &trie,
    858                                const StringAndValue data[], int32_t dataLength) {
    859     UCharsTrie::State state;
    860     for(int32_t i=0; i<dataLength; ++i) {
    861         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    862         int32_t stringLength= (i&1) ? -1 : expectedString.length();
    863         UStringTrieResult result;
    864         if( !USTRINGTRIE_HAS_VALUE(
    865                 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
    866             result!=trie.current()
    867         ) {
    868             errln("trie does not seem to contain %s", data[i].s);
    869         } else if(trie.getValue()!=data[i].value) {
    870             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    871                   data[i].s,
    872                   (long)trie.getValue(), (long)trie.getValue(),
    873                   (long)data[i].value, (long)data[i].value);
    874         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
    875             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
    876         }
    877         trie.reset();
    878         stringLength=expectedString.length();
    879         result=trie.current();
    880         for(int32_t j=0; j<stringLength; ++j) {
    881             if(!USTRINGTRIE_HAS_NEXT(result)) {
    882                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
    883                 break;
    884             }
    885             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
    886                 trie.getValue();
    887                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
    888                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
    889                     break;
    890                 }
    891             }
    892             result=trie.next(expectedString[j]);
    893             if(!USTRINGTRIE_MATCHES(result)) {
    894                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
    895                 break;
    896             }
    897             if(result!=trie.current()) {
    898                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
    899                 break;
    900             }
    901         }
    902         if(!USTRINGTRIE_HAS_VALUE(result)) {
    903             errln("trie.next()!=hasValue at the end of %s", data[i].s);
    904             continue;
    905         }
    906         trie.getValue();
    907         if(result!=trie.current()) {
    908             errln("trie.current() != current()+getValue()+current() after end of %s",
    909                   data[i].s);
    910         }
    911         // Compare the final current() with whether next() can actually continue.
    912         trie.saveState(state);
    913         UBool nextContinues=FALSE;
    914         for(int32_t c=0x20; c<0xe000; ++c) {
    915             if(c==0x80) {
    916                 c=0xd800;  // Check for ASCII and surrogates but not all of the BMP.
    917             }
    918             if(trie.resetToState(state).next(c)) {
    919                 nextContinues=TRUE;
    920                 break;
    921             }
    922         }
    923         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
    924             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
    925                   "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
    926         }
    927         trie.reset();
    928     }
    929 }
    930 
    931 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
    932                                         const StringAndValue data[], int32_t dataLength) {
    933     UCharsTrie::State noState, state;
    934     for(int32_t i=0; i<dataLength; ++i) {
    935         if((i&1)==0) {
    936             // This should have no effect.
    937             trie.resetToState(noState);
    938         }
    939         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    940         int32_t stringLength=expectedString.length();
    941         int32_t partialLength=stringLength/3;
    942         for(int32_t j=0; j<partialLength; ++j) {
    943             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
    944                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
    945                 return;
    946             }
    947         }
    948         trie.saveState(state);
    949         UStringTrieResult resultAtState=trie.current();
    950         UStringTrieResult result;
    951         int32_t valueAtState=-99;
    952         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
    953             valueAtState=trie.getValue();
    954         }
    955         result=trie.next(0);  // mismatch
    956         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
    957             errln("trie.next(0) matched after part of %s", data[i].s);
    958         }
    959         if( resultAtState!=trie.resetToState(state).current() ||
    960             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
    961         ) {
    962             errln("trie.next(part of %s) changes current()/getValue() after "
    963                   "saveState/next(0)/resetToState",
    964                   data[i].s);
    965         } else if(!USTRINGTRIE_HAS_VALUE(
    966                       result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
    967                                        stringLength-partialLength)) ||
    968                   result!=trie.current()) {
    969             errln("trie.next(rest of %s) does not seem to contain %s after "
    970                   "saveState/next(0)/resetToState",
    971                   data[i].s, data[i].s);
    972         } else if(!USTRINGTRIE_HAS_VALUE(
    973                       result=trie.resetToState(state).
    974                                   next(expectedString.getTerminatedBuffer()+partialLength,
    975                                        stringLength-partialLength)) ||
    976                   result!=trie.current()) {
    977             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
    978                   data[i].s);
    979         } else if(trie.getValue()!=data[i].value) {
    980             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    981                   data[i].s,
    982                   (long)trie.getValue(), (long)trie.getValue(),
    983                   (long)data[i].value, (long)data[i].value);
    984         }
    985         trie.reset();
    986     }
    987 }
    988 
    989 // next(string) is also tested in other functions,
    990 // but here we try to go partway through the string, and then beyond it.
    991 void UCharsTrieTest::checkNextString(UCharsTrie &trie,
    992                                      const StringAndValue data[], int32_t dataLength) {
    993     for(int32_t i=0; i<dataLength; ++i) {
    994         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    995         int32_t stringLength=expectedString.length();
    996         if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
    997             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
    998             continue;
    999         }
   1000         // Test that we stop properly at the end of the string.
   1001         if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
   1002                      stringLength+1-stringLength/2)) {
   1003             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
   1004         }
   1005         trie.reset();
   1006     }
   1007 }
   1008 
   1009 void UCharsTrieTest::checkIterator(UCharsTrie &trie,
   1010                                    const StringAndValue data[], int32_t dataLength) {
   1011     IcuTestErrorCode errorCode(*this, "checkIterator()");
   1012     UCharsTrie::Iterator iter(trie, 0, errorCode);
   1013     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
   1014         return;
   1015     }
   1016     checkIterator(iter, data, dataLength);
   1017 }
   1018 
   1019 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
   1020                                    const StringAndValue data[], int32_t dataLength) {
   1021     IcuTestErrorCode errorCode(*this, "checkIterator()");
   1022     for(int32_t i=0; i<dataLength; ++i) {
   1023         if(!iter.hasNext()) {
   1024             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
   1025             break;
   1026         }
   1027         UBool hasNext=iter.next(errorCode);
   1028         if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
   1029             break;
   1030         }
   1031         if(!hasNext) {
   1032             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
   1033             break;
   1034         }
   1035         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
   1036         if(iter.getString()!=expectedString) {
   1037             char buffer[1000];
   1038             UnicodeString invString(prettify(iter.getString()));
   1039             invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV);
   1040             errln("trie iterator next().getString()=%s but expected %s for item %d",
   1041                   buffer, data[i].s, (int)i);
   1042         }
   1043         if(iter.getValue()!=data[i].value) {
   1044             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
   1045                   (long)iter.getValue(), (long)iter.getValue(),
   1046                   (long)data[i].value, (long)data[i].value,
   1047                   (int)i, data[i].s);
   1048         }
   1049     }
   1050     if(iter.hasNext()) {
   1051         errln("trie iterator hasNext()=TRUE after all items");
   1052     }
   1053     UBool hasNext=iter.next(errorCode);
   1054     errorCode.logIfFailureAndReset("trie iterator next() after all items");
   1055     if(hasNext) {
   1056         errln("trie iterator next()=TRUE after all items");
   1057     }
   1058 }
   1059