Home | History | Annotate | Download | only in intltest
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2010-2012, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  ucharstrietest.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2010nov16
     12 *   created by: Markus W. Scherer
     13 */
     14 
     15 #include <string.h>
     16 
     17 #include "unicode/utypes.h"
     18 #include "unicode/appendable.h"
     19 #include "unicode/localpointer.h"
     20 #include "unicode/ucharstrie.h"
     21 #include "unicode/ucharstriebuilder.h"
     22 #include "unicode/uniset.h"
     23 #include "unicode/unistr.h"
     24 #include "intltest.h"
     25 
     26 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     27 
     28 struct StringAndValue {
     29     const char *s;
     30     int32_t value;
     31 };
     32 
     33 class UCharsTrieTest : public IntlTest {
     34 public:
     35     UCharsTrieTest();
     36     virtual ~UCharsTrieTest();
     37 
     38     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
     39     void TestBuilder();
     40     void TestEmpty();
     41     void Test_a();
     42     void Test_a_ab();
     43     void TestShortestBranch();
     44     void TestBranches();
     45     void TestLongSequence();
     46     void TestLongBranch();
     47     void TestValuesForState();
     48     void TestCompact();
     49     void TestFirstForCodePoint();
     50     void TestNextForCodePoint();
     51 
     52     UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
     53     void TestLargeTrie();
     54 
     55     UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
     56     void TestHasUniqueValue();
     57     void TestGetNextUChars();
     58     void TestIteratorFromBranch();
     59     void TestIteratorFromLinearMatch();
     60     void TestTruncatingIteratorFromRoot();
     61     void TestTruncatingIteratorFromLinearMatchShort();
     62     void TestTruncatingIteratorFromLinearMatchLong();
     63     void TestIteratorFromUChars();
     64 
     65     void checkData(const StringAndValue data[], int32_t dataLength);
     66     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
     67     UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
     68                           UStringTrieBuildOption buildOption);
     69     void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     70     void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     71     void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     72     void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     73     void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     74     void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
     75 
     76 private:
     77     UCharsTrieBuilder *builder_;
     78 };
     79 
     80 extern IntlTest *createUCharsTrieTest() {
     81     return new UCharsTrieTest();
     82 }
     83 
     84 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
     85     IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
     86     builder_=new UCharsTrieBuilder(errorCode);
     87 }
     88 
     89 UCharsTrieTest::~UCharsTrieTest() {
     90     delete builder_;
     91 }
     92 
     93 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
     94     if(exec) {
     95         logln("TestSuite UCharsTrieTest: ");
     96     }
     97     TESTCASE_AUTO_BEGIN;
     98     TESTCASE_AUTO(TestBuilder);
     99     TESTCASE_AUTO(TestEmpty);
    100     TESTCASE_AUTO(Test_a);
    101     TESTCASE_AUTO(Test_a_ab);
    102     TESTCASE_AUTO(TestShortestBranch);
    103     TESTCASE_AUTO(TestBranches);
    104     TESTCASE_AUTO(TestLongSequence);
    105     TESTCASE_AUTO(TestLongBranch);
    106     TESTCASE_AUTO(TestValuesForState);
    107     TESTCASE_AUTO(TestCompact);
    108     TESTCASE_AUTO(TestFirstForCodePoint);
    109     TESTCASE_AUTO(TestNextForCodePoint);
    110     TESTCASE_AUTO(TestLargeTrie);
    111     TESTCASE_AUTO(TestHasUniqueValue);
    112     TESTCASE_AUTO(TestGetNextUChars);
    113     TESTCASE_AUTO(TestIteratorFromBranch);
    114     TESTCASE_AUTO(TestIteratorFromLinearMatch);
    115     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
    116     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
    117     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
    118     TESTCASE_AUTO(TestIteratorFromUChars);
    119     TESTCASE_AUTO_END;
    120 }
    121 
    122 void UCharsTrieTest::TestBuilder() {
    123     IcuTestErrorCode errorCode(*this, "TestBuilder()");
    124     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
    125     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
    126         errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
    127         return;
    128     }
    129     // TODO: remove .build(...) once add() checks for duplicates.
    130     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
    131     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
    132         errln("UCharsTrieBuilder.add() did not detect duplicates");
    133         return;
    134     }
    135 }
    136 
    137 void UCharsTrieTest::TestEmpty() {
    138     static const StringAndValue data[]={
    139         { "", 0 }
    140     };
    141     checkData(data, LENGTHOF(data));
    142 }
    143 
    144 void UCharsTrieTest::Test_a() {
    145     static const StringAndValue data[]={
    146         { "a", 1 }
    147     };
    148     checkData(data, LENGTHOF(data));
    149 }
    150 
    151 void UCharsTrieTest::Test_a_ab() {
    152     static const StringAndValue data[]={
    153         { "a", 1 },
    154         { "ab", 100 }
    155     };
    156     checkData(data, LENGTHOF(data));
    157 }
    158 
    159 void UCharsTrieTest::TestShortestBranch() {
    160     static const StringAndValue data[]={
    161         { "a", 1000 },
    162         { "b", 2000 }
    163     };
    164     checkData(data, LENGTHOF(data));
    165 }
    166 
    167 void UCharsTrieTest::TestBranches() {
    168     static const StringAndValue data[]={
    169         { "a", 0x10 },
    170         { "cc", 0x40 },
    171         { "e", 0x100 },
    172         { "ggg", 0x400 },
    173         { "i", 0x1000 },
    174         { "kkkk", 0x4000 },
    175         { "n", 0x10000 },
    176         { "ppppp", 0x40000 },
    177         { "r", 0x100000 },
    178         { "sss", 0x200000 },
    179         { "t", 0x400000 },
    180         { "uu", 0x800000 },
    181         { "vv", 0x7fffffff },
    182         { "zz", (int32_t)0x80000000 }
    183     };
    184     for(int32_t length=2; length<=LENGTHOF(data); ++length) {
    185         infoln("TestBranches length=%d", (int)length);
    186         checkData(data, length);
    187     }
    188 }
    189 
    190 void UCharsTrieTest::TestLongSequence() {
    191     static const StringAndValue data[]={
    192         { "a", -1 },
    193         // sequence of linear-match nodes
    194         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
    195         // more than 256 units
    196         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    197           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    198           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    199           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    200           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    201           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
    202     };
    203     checkData(data, LENGTHOF(data));
    204 }
    205 
    206 void UCharsTrieTest::TestLongBranch() {
    207     // Split-branch and interesting compact-integer values.
    208     static const StringAndValue data[]={
    209         { "a", -2 },
    210         { "b", -1 },
    211         { "c", 0 },
    212         { "d2", 1 },
    213         { "f", 0x3f },
    214         { "g", 0x40 },
    215         { "h", 0x41 },
    216         { "j23", 0x1900 },
    217         { "j24", 0x19ff },
    218         { "j25", 0x1a00 },
    219         { "k2", 0x1a80 },
    220         { "k3", 0x1aff },
    221         { "l234567890", 0x1b00 },
    222         { "l234567890123", 0x1b01 },
    223         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
    224         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
    225         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
    226         { "r", 0x333333 },
    227         { "s2345", 0x4444444 },
    228         { "t234567890", 0x77777777 },
    229         { "z", (int32_t)0x80000001 }
    230     };
    231     checkData(data, LENGTHOF(data));
    232 }
    233 
    234 void UCharsTrieTest::TestValuesForState() {
    235     // Check that saveState() and resetToState() interact properly
    236     // with next() and current().
    237     static const StringAndValue data[]={
    238         { "a", -1 },
    239         { "ab", -2 },
    240         { "abc", -3 },
    241         { "abcd", -4 },
    242         { "abcde", -5 },
    243         { "abcdef", -6 }
    244     };
    245     checkData(data, LENGTHOF(data));
    246 }
    247 
    248 void UCharsTrieTest::TestCompact() {
    249     // Duplicate trailing strings and values provide opportunities for compacting.
    250     static const StringAndValue data[]={
    251         { "+", 0 },
    252         { "+august", 8 },
    253         { "+december", 12 },
    254         { "+july", 7 },
    255         { "+june", 6 },
    256         { "+november", 11 },
    257         { "+october", 10 },
    258         { "+september", 9 },
    259         { "-", 0 },
    260         { "-august", 8 },
    261         { "-december", 12 },
    262         { "-july", 7 },
    263         { "-june", 6 },
    264         { "-november", 11 },
    265         { "-october", 10 },
    266         { "-september", 9 },
    267         // The l+n branch (with its sub-nodes) is a duplicate but will be written
    268         // both times because each time it follows a different linear-match node.
    269         { "xjuly", 7 },
    270         { "xjune", 6 }
    271     };
    272     checkData(data, LENGTHOF(data));
    273 }
    274 
    275 void UCharsTrieTest::TestFirstForCodePoint() {
    276     static const StringAndValue data[]={
    277         { "a", 1 },
    278         { "a\\ud800", 2 },
    279         { "a\\U00010000", 3 },
    280         { "\\ud840", 4 },
    281         { "\\U00020000\\udbff", 5 },
    282         { "\\U00020000\\U0010ffff", 6 },
    283         { "\\U00020000\\U0010ffffz", 7 },
    284         { "\\U00050000xy", 8 },
    285         { "\\U00050000xyz", 9 }
    286     };
    287     checkData(data, LENGTHOF(data));
    288 }
    289 
    290 void UCharsTrieTest::TestNextForCodePoint() {
    291     static const StringAndValue data[]={
    292         { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
    293         { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
    294         { "\\u4dff\\U000103ff", 99999 }
    295     };
    296     LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    297     if(trie.isNull()) {
    298         return;  // buildTrie() reported an error
    299     }
    300     UStringTrieResult result;
    301     if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    302         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    303         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    304         (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    305         (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    306         (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
    307         trie->getValue()!=2000000000
    308     ) {
    309         errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
    310     }
    311     if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    312         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    313         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    314         (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
    315         trie->getValue()!=44444
    316     ) {
    317         errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
    318     }
    319     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    320         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    321         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    322         (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current()  // no match for trail surrogate
    323     ) {
    324         errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
    325     }
    326     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    327         (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
    328         trie->getValue()!=99999
    329     ) {
    330         errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
    331     }
    332 }
    333 
    334 // Definitions in the anonymous namespace are invisible outside this file.
    335 namespace {
    336 
    337 // Generate (string, value) pairs.
    338 // The first string (before next()) will be empty.
    339 class Generator {
    340 public:
    341     Generator() : value(4711), num(0) {}
    342     void next() {
    343         UChar c;
    344         s.truncate(0);
    345         s.append(c=(UChar)(value>>16));
    346         s.append((UChar)(value>>4));
    347         if(value&1) {
    348             s.append((UChar)value);
    349         }
    350         set.add(c);
    351         value+=((value>>5)&0x7ff)*3+1;
    352         ++num;
    353     }
    354     const UnicodeString &getString() const { return s; }
    355     int32_t getValue() const { return value; }
    356     int32_t countUniqueFirstChars() const { return set.size(); }
    357     int32_t getIndex() const { return num; }
    358 
    359 private:
    360     UnicodeString s;
    361     UnicodeSet set;
    362     int32_t value;
    363     int32_t num;
    364 };
    365 
    366 }  // end namespace
    367 
    368 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
    369     IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
    370     Generator gen;
    371     builder_->clear();
    372     while(gen.countUniqueFirstChars()<numUniqueFirst) {
    373         builder_->add(gen.getString(), gen.getValue(), errorCode);
    374         gen.next();
    375     }
    376     infoln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
    377     UnicodeString trieUChars;
    378     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
    379     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
    380     return new UCharsTrie(trieUChars.getBuffer());
    381 }
    382 
    383 // Exercise a large branch node.
    384 void UCharsTrieTest::TestLargeTrie() {
    385     LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
    386     if(trie.isNull()) {
    387         return;  // buildTrie() reported an error
    388     }
    389     Generator gen;
    390     while(gen.countUniqueFirstChars()<1111) {
    391         UnicodeString x(gen.getString());
    392         int32_t value=gen.getValue();
    393         if(!x.isEmpty()) {
    394             if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
    395                 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
    396                       x[0], (long)gen.getIndex());
    397                 break;
    398             }
    399             x.remove(0, 1);
    400         }
    401         UStringTrieResult result=trie->next(x.getBuffer(), x.length());
    402         if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
    403             errln("next(%d chars U+%04X U+%04X)!=hasValue or "
    404                   "next()!=current() or getValue() wrong "
    405                   "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
    406             break;
    407         }
    408         gen.next();
    409     }
    410 }
    411 
    412 enum {
    413     u_a=0x61,
    414     u_b=0x62,
    415     u_c=0x63,
    416     u_j=0x6a,
    417     u_n=0x6e,
    418     u_r=0x72,
    419     u_u=0x75,
    420     u_y=0x79
    421 };
    422 
    423 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
    424     // All types of nodes leading to the same value,
    425     // for code coverage of recursive functions.
    426     // In particular, we need a lot of branches on some single level
    427     // to exercise a split-branch node.
    428     static const StringAndValue data[]={
    429         { "august", 8 },
    430         { "jan", 1 },
    431         { "jan.", 1 },
    432         { "jana", 1 },
    433         { "janbb", 1 },
    434         { "janc", 1 },
    435         { "janddd", 1 },
    436         { "janee", 1 },
    437         { "janef", 1 },
    438         { "janf", 1 },
    439         { "jangg", 1 },
    440         { "janh", 1 },
    441         { "janiiii", 1 },
    442         { "janj", 1 },
    443         { "jankk", 1 },
    444         { "jankl", 1 },
    445         { "jankmm", 1 },
    446         { "janl", 1 },
    447         { "janm", 1 },
    448         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    449         { "jano", 1 },
    450         { "janpp", 1 },
    451         { "janqqq", 1 },
    452         { "janr", 1 },
    453         { "januar", 1 },
    454         { "january", 1 },
    455         { "july", 7 },
    456         { "jun", 6 },
    457         { "jun.", 6 },
    458         { "june", 6 }
    459     };
    460     return buildTrie(data, LENGTHOF(data), buildOption);
    461 }
    462 
    463 void UCharsTrieTest::TestHasUniqueValue() {
    464     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    465     if(trie.isNull()) {
    466         return;  // buildTrie() reported an error
    467     }
    468     int32_t uniqueValue;
    469     if(trie->hasUniqueValue(uniqueValue)) {
    470         errln("unique value at root");
    471     }
    472     trie->next(u_j);
    473     trie->next(u_a);
    474     trie->next(u_n);
    475     // hasUniqueValue() directly after next()
    476     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
    477         errln("not unique value 1 after \"jan\"");
    478     }
    479     trie->first(u_j);
    480     trie->next(u_u);
    481     if(trie->hasUniqueValue(uniqueValue)) {
    482         errln("unique value after \"ju\"");
    483     }
    484     if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
    485         errln("not normal value 6 after \"jun\"");
    486     }
    487     // hasUniqueValue() after getValue()
    488     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
    489         errln("not unique value 6 after \"jun\"");
    490     }
    491     // hasUniqueValue() from within a linear-match node
    492     trie->first(u_a);
    493     trie->next(u_u);
    494     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
    495         errln("not unique value 8 after \"au\"");
    496     }
    497 }
    498 
    499 void UCharsTrieTest::TestGetNextUChars() {
    500     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    501     if(trie.isNull()) {
    502         return;  // buildTrie() reported an error
    503     }
    504     UnicodeString buffer;
    505     UnicodeStringAppendable app(buffer);
    506     int32_t count=trie->getNextUChars(app);
    507     if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
    508         errln("months getNextUChars()!=[aj] at root");
    509     }
    510     trie->next(u_j);
    511     trie->next(u_a);
    512     trie->next(u_n);
    513     // getNextUChars() directly after next()
    514     buffer.remove();
    515     count=trie->getNextUChars(app);
    516     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
    517         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
    518     }
    519     // getNextUChars() after getValue()
    520     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
    521     buffer.remove();
    522     count=trie->getNextUChars(app);
    523     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
    524         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
    525     }
    526     // getNextUChars() from a linear-match node
    527     trie->next(u_u);
    528     buffer.remove();
    529     count=trie->getNextUChars(app);
    530     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
    531         errln("months getNextUChars()!=[a] after \"janu\"");
    532     }
    533     trie->next(u_a);
    534     buffer.remove();
    535     count=trie->getNextUChars(app);
    536     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
    537         errln("months getNextUChars()!=[r] after \"janua\"");
    538     }
    539     trie->next(u_r);
    540     trie->next(u_y);
    541     // getNextUChars() after a final match
    542     buffer.remove();
    543     count=trie->getNextUChars(app);
    544     if(count!=0 || buffer.length()!=0) {
    545         errln("months getNextUChars()!=[] after \"january\"");
    546     }
    547 }
    548 
    549 void UCharsTrieTest::TestIteratorFromBranch() {
    550     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    551     if(trie.isNull()) {
    552         return;  // buildTrie() reported an error
    553     }
    554     // Go to a branch node.
    555     trie->next(u_j);
    556     trie->next(u_a);
    557     trie->next(u_n);
    558     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
    559     UCharsTrie::Iterator iter(*trie, 0, errorCode);
    560     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    561         return;
    562     }
    563     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    564     // following "jan".
    565     static const StringAndValue data[]={
    566         { "", 1 },
    567         { ".", 1 },
    568         { "a", 1 },
    569         { "bb", 1 },
    570         { "c", 1 },
    571         { "ddd", 1 },
    572         { "ee", 1 },
    573         { "ef", 1 },
    574         { "f", 1 },
    575         { "gg", 1 },
    576         { "h", 1 },
    577         { "iiii", 1 },
    578         { "j", 1 },
    579         { "kk", 1 },
    580         { "kl", 1 },
    581         { "kmm", 1 },
    582         { "l", 1 },
    583         { "m", 1 },
    584         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    585         { "o", 1 },
    586         { "pp", 1 },
    587         { "qqq", 1 },
    588         { "r", 1 },
    589         { "uar", 1 },
    590         { "uary", 1 }
    591     };
    592     checkIterator(iter, data, LENGTHOF(data));
    593     // Reset, and we should get the same result.
    594     logln("after iter.reset()");
    595     checkIterator(iter.reset(), data, LENGTHOF(data));
    596 }
    597 
    598 void UCharsTrieTest::TestIteratorFromLinearMatch() {
    599     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    600     if(trie.isNull()) {
    601         return;  // buildTrie() reported an error
    602     }
    603     // Go into a linear-match node.
    604     trie->next(u_j);
    605     trie->next(u_a);
    606     trie->next(u_n);
    607     trie->next(u_u);
    608     trie->next(u_a);
    609     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
    610     UCharsTrie::Iterator iter(*trie, 0, errorCode);
    611     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    612         return;
    613     }
    614     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    615     // following "janua".
    616     static const StringAndValue data[]={
    617         { "r", 1 },
    618         { "ry", 1 }
    619     };
    620     checkIterator(iter, data, LENGTHOF(data));
    621     // Reset, and we should get the same result.
    622     logln("after iter.reset()");
    623     checkIterator(iter.reset(), data, LENGTHOF(data));
    624 }
    625 
    626 void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
    627     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    628     if(trie.isNull()) {
    629         return;  // buildTrie() reported an error
    630     }
    631     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
    632     UCharsTrie::Iterator iter(*trie, 4, errorCode);
    633     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    634         return;
    635     }
    636     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
    637     // of each string, and no string duplicates from the truncation.
    638     static const StringAndValue data[]={
    639         { "augu", -1 },
    640         { "jan", 1 },
    641         { "jan.", 1 },
    642         { "jana", 1 },
    643         { "janb", -1 },
    644         { "janc", 1 },
    645         { "jand", -1 },
    646         { "jane", -1 },
    647         { "janf", 1 },
    648         { "jang", -1 },
    649         { "janh", 1 },
    650         { "jani", -1 },
    651         { "janj", 1 },
    652         { "jank", -1 },
    653         { "janl", 1 },
    654         { "janm", 1 },
    655         { "jann", -1 },
    656         { "jano", 1 },
    657         { "janp", -1 },
    658         { "janq", -1 },
    659         { "janr", 1 },
    660         { "janu", -1 },
    661         { "july", 7 },
    662         { "jun", 6 },
    663         { "jun.", 6 },
    664         { "june", 6 }
    665     };
    666     checkIterator(iter, data, LENGTHOF(data));
    667     // Reset, and we should get the same result.
    668     logln("after iter.reset()");
    669     checkIterator(iter.reset(), data, LENGTHOF(data));
    670 }
    671 
    672 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
    673     static const StringAndValue data[]={
    674         { "abcdef", 10 },
    675         { "abcdepq", 200 },
    676         { "abcdeyz", 3000 }
    677     };
    678     LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    679     if(trie.isNull()) {
    680         return;  // buildTrie() reported an error
    681     }
    682     // Go into a linear-match node.
    683     trie->next(u_a);
    684     trie->next(u_b);
    685     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
    686     // Truncate within the linear-match node.
    687     UCharsTrie::Iterator iter(*trie, 2, errorCode);
    688     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    689         return;
    690     }
    691     static const StringAndValue expected[]={
    692         { "cd", -1 }
    693     };
    694     checkIterator(iter, expected, LENGTHOF(expected));
    695     // Reset, and we should get the same result.
    696     logln("after iter.reset()");
    697     checkIterator(iter.reset(), expected, LENGTHOF(expected));
    698 }
    699 
    700 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
    701     static const StringAndValue data[]={
    702         { "abcdef", 10 },
    703         { "abcdepq", 200 },
    704         { "abcdeyz", 3000 }
    705     };
    706     LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    707     if(trie.isNull()) {
    708         return;  // buildTrie() reported an error
    709     }
    710     // Go into a linear-match node.
    711     trie->next(u_a);
    712     trie->next(u_b);
    713     trie->next(u_c);
    714     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
    715     // Truncate after the linear-match node.
    716     UCharsTrie::Iterator iter(*trie, 3, errorCode);
    717     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    718         return;
    719     }
    720     static const StringAndValue expected[]={
    721         { "def", 10 },
    722         { "dep", -1 },
    723         { "dey", -1 }
    724     };
    725     checkIterator(iter, expected, LENGTHOF(expected));
    726     // Reset, and we should get the same result.
    727     logln("after iter.reset()");
    728     checkIterator(iter.reset(), expected, LENGTHOF(expected));
    729 }
    730 
    731 void UCharsTrieTest::TestIteratorFromUChars() {
    732     static const StringAndValue data[]={
    733         { "mm", 3 },
    734         { "mmm", 33 },
    735         { "mmnop", 333 }
    736     };
    737     builder_->clear();
    738     IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
    739     for(int32_t i=0; i<LENGTHOF(data); ++i) {
    740         builder_->add(data[i].s, data[i].value, errorCode);
    741     }
    742     UnicodeString trieUChars;
    743     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
    744     UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
    745     checkIterator(iter, data, LENGTHOF(data));
    746 }
    747 
    748 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
    749     logln("checkData(dataLength=%d, fast)", (int)dataLength);
    750     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
    751     logln("checkData(dataLength=%d, small)", (int)dataLength);
    752     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
    753 }
    754 
    755 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
    756     LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
    757     if(trie.isNull()) {
    758         return;  // buildTrie() reported an error
    759     }
    760     checkFirst(*trie, data, dataLength);
    761     checkNext(*trie, data, dataLength);
    762     checkNextWithState(*trie, data, dataLength);
    763     checkNextString(*trie, data, dataLength);
    764     checkIterator(*trie, data, dataLength);
    765 }
    766 
    767 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
    768                                       UStringTrieBuildOption buildOption) {
    769     IcuTestErrorCode errorCode(*this, "buildTrie()");
    770     // Add the items to the trie builder in an interesting (not trivial, not random) order.
    771     int32_t index, step;
    772     if(dataLength&1) {
    773         // Odd number of items.
    774         index=dataLength/2;
    775         step=2;
    776     } else if((dataLength%3)!=0) {
    777         // Not a multiple of 3.
    778         index=dataLength/5;
    779         step=3;
    780     } else {
    781         index=dataLength-1;
    782         step=-1;
    783     }
    784     builder_->clear();
    785     for(int32_t i=0; i<dataLength; ++i) {
    786         builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
    787                       data[index].value, errorCode);
    788         index=(index+step)%dataLength;
    789     }
    790     UnicodeString trieUChars;
    791     builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
    792     LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
    793     if(!errorCode.logIfFailureAndReset("add()/build()")) {
    794         builder_->add("zzz", 999, errorCode);
    795         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
    796             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
    797         }
    798     }
    799     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
    800     UnicodeString trieUChars2;
    801     builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
    802     if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
    803         errln("builder.buildUnicodeString() before & after build() returned same array");
    804     }
    805     if(errorCode.isFailure()) {
    806         return NULL;
    807     }
    808     // Tries from either build() method should be identical but
    809     // UCharsTrie does not implement equals().
    810     // We just return either one.
    811     if((dataLength&1)!=0) {
    812         return trie.orphan();
    813     } else {
    814         return new UCharsTrie(trieUChars2.getBuffer());
    815     }
    816 }
    817 
    818 void UCharsTrieTest::checkFirst(UCharsTrie &trie,
    819                                 const StringAndValue data[], int32_t dataLength) {
    820     for(int32_t i=0; i<dataLength; ++i) {
    821         if(*data[i].s==0) {
    822             continue;  // skip empty string
    823         }
    824         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    825         UChar32 c=expectedString[0];
    826         UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
    827         UStringTrieResult firstResult=trie.first(c);
    828         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
    829         UStringTrieResult nextResult=trie.next(nextCp);
    830         if(firstResult!=trie.reset().next(c) ||
    831            firstResult!=trie.current() ||
    832            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
    833            nextResult!=trie.next(nextCp)
    834         ) {
    835             errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
    836                   c, data[i].s);
    837         }
    838         c=expectedString.char32At(0);
    839         int32_t cLength=U16_LENGTH(c);
    840         nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
    841         firstResult=trie.firstForCodePoint(c);
    842         firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
    843         nextResult=trie.nextForCodePoint(nextCp);
    844         if(firstResult!=trie.reset().nextForCodePoint(c) ||
    845            firstResult!=trie.current() ||
    846            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
    847            nextResult!=trie.nextForCodePoint(nextCp)
    848         ) {
    849             errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
    850                   c, data[i].s);
    851         }
    852     }
    853     trie.reset();
    854 }
    855 
    856 void UCharsTrieTest::checkNext(UCharsTrie &trie,
    857                                const StringAndValue data[], int32_t dataLength) {
    858     UCharsTrie::State state;
    859     for(int32_t i=0; i<dataLength; ++i) {
    860         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    861         int32_t stringLength= (i&1) ? -1 : expectedString.length();
    862         UStringTrieResult result;
    863         if( !USTRINGTRIE_HAS_VALUE(
    864                 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
    865             result!=trie.current()
    866         ) {
    867             errln("trie does not seem to contain %s", data[i].s);
    868         } else if(trie.getValue()!=data[i].value) {
    869             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    870                   data[i].s,
    871                   (long)trie.getValue(), (long)trie.getValue(),
    872                   (long)data[i].value, (long)data[i].value);
    873         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
    874             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
    875         }
    876         trie.reset();
    877         stringLength=expectedString.length();
    878         result=trie.current();
    879         for(int32_t j=0; j<stringLength; ++j) {
    880             if(!USTRINGTRIE_HAS_NEXT(result)) {
    881                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
    882                 break;
    883             }
    884             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
    885                 trie.getValue();
    886                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
    887                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
    888                     break;
    889                 }
    890             }
    891             result=trie.next(expectedString[j]);
    892             if(!USTRINGTRIE_MATCHES(result)) {
    893                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
    894                 break;
    895             }
    896             if(result!=trie.current()) {
    897                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
    898                 break;
    899             }
    900         }
    901         if(!USTRINGTRIE_HAS_VALUE(result)) {
    902             errln("trie.next()!=hasValue at the end of %s", data[i].s);
    903             continue;
    904         }
    905         trie.getValue();
    906         if(result!=trie.current()) {
    907             errln("trie.current() != current()+getValue()+current() after end of %s",
    908                   data[i].s);
    909         }
    910         // Compare the final current() with whether next() can actually continue.
    911         trie.saveState(state);
    912         UBool nextContinues=FALSE;
    913         for(int32_t c=0x20; c<0xe000; ++c) {
    914             if(c==0x80) {
    915                 c=0xd800;  // Check for ASCII and surrogates but not all of the BMP.
    916             }
    917             if(trie.resetToState(state).next(c)) {
    918                 nextContinues=TRUE;
    919                 break;
    920             }
    921         }
    922         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
    923             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
    924                   "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
    925         }
    926         trie.reset();
    927     }
    928 }
    929 
    930 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
    931                                         const StringAndValue data[], int32_t dataLength) {
    932     UCharsTrie::State noState, state;
    933     for(int32_t i=0; i<dataLength; ++i) {
    934         if((i&1)==0) {
    935             // This should have no effect.
    936             trie.resetToState(noState);
    937         }
    938         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    939         int32_t stringLength=expectedString.length();
    940         int32_t partialLength=stringLength/3;
    941         for(int32_t j=0; j<partialLength; ++j) {
    942             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
    943                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
    944                 return;
    945             }
    946         }
    947         trie.saveState(state);
    948         UStringTrieResult resultAtState=trie.current();
    949         UStringTrieResult result;
    950         int32_t valueAtState=-99;
    951         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
    952             valueAtState=trie.getValue();
    953         }
    954         result=trie.next(0);  // mismatch
    955         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
    956             errln("trie.next(0) matched after part of %s", data[i].s);
    957         }
    958         if( resultAtState!=trie.resetToState(state).current() ||
    959             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
    960         ) {
    961             errln("trie.next(part of %s) changes current()/getValue() after "
    962                   "saveState/next(0)/resetToState",
    963                   data[i].s);
    964         } else if(!USTRINGTRIE_HAS_VALUE(
    965                       result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
    966                                        stringLength-partialLength)) ||
    967                   result!=trie.current()) {
    968             errln("trie.next(rest of %s) does not seem to contain %s after "
    969                   "saveState/next(0)/resetToState",
    970                   data[i].s, data[i].s);
    971         } else if(!USTRINGTRIE_HAS_VALUE(
    972                       result=trie.resetToState(state).
    973                                   next(expectedString.getTerminatedBuffer()+partialLength,
    974                                        stringLength-partialLength)) ||
    975                   result!=trie.current()) {
    976             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
    977                   data[i].s);
    978         } else if(trie.getValue()!=data[i].value) {
    979             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    980                   data[i].s,
    981                   (long)trie.getValue(), (long)trie.getValue(),
    982                   (long)data[i].value, (long)data[i].value);
    983         }
    984         trie.reset();
    985     }
    986 }
    987 
    988 // next(string) is also tested in other functions,
    989 // but here we try to go partway through the string, and then beyond it.
    990 void UCharsTrieTest::checkNextString(UCharsTrie &trie,
    991                                      const StringAndValue data[], int32_t dataLength) {
    992     for(int32_t i=0; i<dataLength; ++i) {
    993         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    994         int32_t stringLength=expectedString.length();
    995         if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
    996             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
    997             continue;
    998         }
    999         // Test that we stop properly at the end of the string.
   1000         if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
   1001                      stringLength+1-stringLength/2)) {
   1002             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
   1003         }
   1004         trie.reset();
   1005     }
   1006 }
   1007 
   1008 void UCharsTrieTest::checkIterator(UCharsTrie &trie,
   1009                                    const StringAndValue data[], int32_t dataLength) {
   1010     IcuTestErrorCode errorCode(*this, "checkIterator()");
   1011     UCharsTrie::Iterator iter(trie, 0, errorCode);
   1012     if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
   1013         return;
   1014     }
   1015     checkIterator(iter, data, dataLength);
   1016 }
   1017 
   1018 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
   1019                                    const StringAndValue data[], int32_t dataLength) {
   1020     IcuTestErrorCode errorCode(*this, "checkIterator()");
   1021     for(int32_t i=0; i<dataLength; ++i) {
   1022         if(!iter.hasNext()) {
   1023             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
   1024             break;
   1025         }
   1026         UBool hasNext=iter.next(errorCode);
   1027         if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
   1028             break;
   1029         }
   1030         if(!hasNext) {
   1031             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
   1032             break;
   1033         }
   1034         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
   1035         if(iter.getString()!=expectedString) {
   1036             char buffer[1000];
   1037             UnicodeString invString(prettify(iter.getString()));
   1038             invString.extract(0, invString.length(), buffer, LENGTHOF(buffer), US_INV);
   1039             errln("trie iterator next().getString()=%s but expected %s for item %d",
   1040                   buffer, data[i].s, (int)i);
   1041         }
   1042         if(iter.getValue()!=data[i].value) {
   1043             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
   1044                   (long)iter.getValue(), (long)iter.getValue(),
   1045                   (long)data[i].value, (long)data[i].value,
   1046                   (int)i, data[i].s);
   1047         }
   1048     }
   1049     if(iter.hasNext()) {
   1050         errln("trie iterator hasNext()=TRUE after all items");
   1051     }
   1052     UBool hasNext=iter.next(errorCode);
   1053     errorCode.logIfFailureAndReset("trie iterator next() after all items");
   1054     if(hasNext) {
   1055         errln("trie iterator next()=TRUE after all items");
   1056     }
   1057 }
   1058