Home | History | Annotate | Download | only in intltest
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2010-2014, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  ucharstrietest.cpp
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2010nov16
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include <string.h>
     18 
     19 #include "unicode/utypes.h"
     20 #include "unicode/appendable.h"
     21 #include "unicode/localpointer.h"
     22 #include "unicode/ucharstrie.h"
     23 #include "unicode/ucharstriebuilder.h"
     24 #include "unicode/uniset.h"
     25 #include "unicode/unistr.h"
     26 #include "unicode/utf16.h"
     27 #include "intltest.h"
     28 #include "cmemory.h"
     29 
     30 struct StringAndValue {
     31     const char *s;
     32     int32_t value;
     33 };
     34 
     35 class UCharsTrieTest : public IntlTest {
     36 public:
     37     UCharsTrieTest();
     38     virtual ~UCharsTrieTest();
     39 
     40     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
     41     void TestBuilder();
     42     void TestEmpty();
     43     void Test_a();
     44     void Test_a_ab();
     45     void TestShortestBranch();
     46     void TestBranches();
     47     void TestLongSequence();
     48     void TestLongBranch();
     49     void TestValuesForState();
     50     void TestCompact();
     51     void TestFirstForCodePoint();
     52     void TestNextForCodePoint();
     53 
     54     UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
     55     void TestLargeTrie();
     56 
     57     UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
     58     void TestHasUniqueValue();
     59     void TestGetNextUChars();
     60     void TestIteratorFromBranch();
     61     void TestIteratorFromLinearMatch();
     62     void TestTruncatingIteratorFromRoot();
     63     void TestTruncatingIteratorFromLinearMatchShort();
     64     void TestTruncatingIteratorFromLinearMatchLong();
     65     void TestIteratorFromUChars();
     66 
     67     void checkData(const StringAndValue data[], int32_t dataLength);
     68     void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
     69     UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
     70                           UStringTrieBuildOption buildOption);
     71     void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     72     void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     73     void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     74     void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     75     void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
     76     void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
     77 
     78 private:
     79     UCharsTrieBuilder *builder_;
     80 };
     81 
     82 extern IntlTest *createUCharsTrieTest() {
     83     return new UCharsTrieTest();
     84 }
     85 
     86 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
     87     IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
     88     builder_=new UCharsTrieBuilder(errorCode);
     89 }
     90 
     91 UCharsTrieTest::~UCharsTrieTest() {
     92     delete builder_;
     93 }
     94 
     95 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
     96     if(exec) {
     97         logln("TestSuite UCharsTrieTest: ");
     98     }
     99     TESTCASE_AUTO_BEGIN;
    100     TESTCASE_AUTO(TestBuilder);
    101     TESTCASE_AUTO(TestEmpty);
    102     TESTCASE_AUTO(Test_a);
    103     TESTCASE_AUTO(Test_a_ab);
    104     TESTCASE_AUTO(TestShortestBranch);
    105     TESTCASE_AUTO(TestBranches);
    106     TESTCASE_AUTO(TestLongSequence);
    107     TESTCASE_AUTO(TestLongBranch);
    108     TESTCASE_AUTO(TestValuesForState);
    109     TESTCASE_AUTO(TestCompact);
    110     TESTCASE_AUTO(TestFirstForCodePoint);
    111     TESTCASE_AUTO(TestNextForCodePoint);
    112     TESTCASE_AUTO(TestLargeTrie);
    113     TESTCASE_AUTO(TestHasUniqueValue);
    114     TESTCASE_AUTO(TestGetNextUChars);
    115     TESTCASE_AUTO(TestIteratorFromBranch);
    116     TESTCASE_AUTO(TestIteratorFromLinearMatch);
    117     TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
    118     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
    119     TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
    120     TESTCASE_AUTO(TestIteratorFromUChars);
    121     TESTCASE_AUTO_END;
    122 }
    123 
    124 void UCharsTrieTest::TestBuilder() {
    125     IcuTestErrorCode errorCode(*this, "TestBuilder()");
    126     delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
    127     if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
    128         errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
    129         return;
    130     }
    131     // TODO: remove .build(...) once add() checks for duplicates.
    132     builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
    133     if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
    134         errln("UCharsTrieBuilder.add() did not detect duplicates");
    135         return;
    136     }
    137 }
    138 
    139 void UCharsTrieTest::TestEmpty() {
    140     static const StringAndValue data[]={
    141         { "", 0 }
    142     };
    143     checkData(data, UPRV_LENGTHOF(data));
    144 }
    145 
    146 void UCharsTrieTest::Test_a() {
    147     static const StringAndValue data[]={
    148         { "a", 1 }
    149     };
    150     checkData(data, UPRV_LENGTHOF(data));
    151 }
    152 
    153 void UCharsTrieTest::Test_a_ab() {
    154     static const StringAndValue data[]={
    155         { "a", 1 },
    156         { "ab", 100 }
    157     };
    158     checkData(data, UPRV_LENGTHOF(data));
    159 }
    160 
    161 void UCharsTrieTest::TestShortestBranch() {
    162     static const StringAndValue data[]={
    163         { "a", 1000 },
    164         { "b", 2000 }
    165     };
    166     checkData(data, UPRV_LENGTHOF(data));
    167 }
    168 
    169 void UCharsTrieTest::TestBranches() {
    170     static const StringAndValue data[]={
    171         { "a", 0x10 },
    172         { "cc", 0x40 },
    173         { "e", 0x100 },
    174         { "ggg", 0x400 },
    175         { "i", 0x1000 },
    176         { "kkkk", 0x4000 },
    177         { "n", 0x10000 },
    178         { "ppppp", 0x40000 },
    179         { "r", 0x100000 },
    180         { "sss", 0x200000 },
    181         { "t", 0x400000 },
    182         { "uu", 0x800000 },
    183         { "vv", 0x7fffffff },
    184         { "zz", (int32_t)0x80000000 }
    185     };
    186     for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
    187         logln("TestBranches length=%d", (int)length);
    188         checkData(data, length);
    189     }
    190 }
    191 
    192 void UCharsTrieTest::TestLongSequence() {
    193     static const StringAndValue data[]={
    194         { "a", -1 },
    195         // sequence of linear-match nodes
    196         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
    197         // more than 256 units
    198         { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    199           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    200           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    201           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    202           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
    203           "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
    204     };
    205     checkData(data, UPRV_LENGTHOF(data));
    206 }
    207 
    208 void UCharsTrieTest::TestLongBranch() {
    209     // Split-branch and interesting compact-integer values.
    210     static const StringAndValue data[]={
    211         { "a", -2 },
    212         { "b", -1 },
    213         { "c", 0 },
    214         { "d2", 1 },
    215         { "f", 0x3f },
    216         { "g", 0x40 },
    217         { "h", 0x41 },
    218         { "j23", 0x1900 },
    219         { "j24", 0x19ff },
    220         { "j25", 0x1a00 },
    221         { "k2", 0x1a80 },
    222         { "k3", 0x1aff },
    223         { "l234567890", 0x1b00 },
    224         { "l234567890123", 0x1b01 },
    225         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
    226         { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
    227         { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
    228         { "r", 0x333333 },
    229         { "s2345", 0x4444444 },
    230         { "t234567890", 0x77777777 },
    231         { "z", (int32_t)0x80000001 }
    232     };
    233     checkData(data, UPRV_LENGTHOF(data));
    234 }
    235 
    236 void UCharsTrieTest::TestValuesForState() {
    237     // Check that saveState() and resetToState() interact properly
    238     // with next() and current().
    239     static const StringAndValue data[]={
    240         { "a", -1 },
    241         { "ab", -2 },
    242         { "abc", -3 },
    243         { "abcd", -4 },
    244         { "abcde", -5 },
    245         { "abcdef", -6 }
    246     };
    247     checkData(data, UPRV_LENGTHOF(data));
    248 }
    249 
    250 void UCharsTrieTest::TestCompact() {
    251     // Duplicate trailing strings and values provide opportunities for compacting.
    252     static const StringAndValue data[]={
    253         { "+", 0 },
    254         { "+august", 8 },
    255         { "+december", 12 },
    256         { "+july", 7 },
    257         { "+june", 6 },
    258         { "+november", 11 },
    259         { "+october", 10 },
    260         { "+september", 9 },
    261         { "-", 0 },
    262         { "-august", 8 },
    263         { "-december", 12 },
    264         { "-july", 7 },
    265         { "-june", 6 },
    266         { "-november", 11 },
    267         { "-october", 10 },
    268         { "-september", 9 },
    269         // The l+n branch (with its sub-nodes) is a duplicate but will be written
    270         // both times because each time it follows a different linear-match node.
    271         { "xjuly", 7 },
    272         { "xjune", 6 }
    273     };
    274     checkData(data, UPRV_LENGTHOF(data));
    275 }
    276 
    277 void UCharsTrieTest::TestFirstForCodePoint() {
    278     static const StringAndValue data[]={
    279         { "a", 1 },
    280         { "a\\ud800", 2 },
    281         { "a\\U00010000", 3 },
    282         { "\\ud840", 4 },
    283         { "\\U00020000\\udbff", 5 },
    284         { "\\U00020000\\U0010ffff", 6 },
    285         { "\\U00020000\\U0010ffffz", 7 },
    286         { "\\U00050000xy", 8 },
    287         { "\\U00050000xyz", 9 }
    288     };
    289     checkData(data, UPRV_LENGTHOF(data));
    290 }
    291 
    292 void UCharsTrieTest::TestNextForCodePoint() {
    293     static const StringAndValue data[]={
    294         { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
    295         { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
    296         { "\\u4dff\\U000103ff", 99999 }
    297     };
    298     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    299     if(trie.isNull()) {
    300         return;  // buildTrie() reported an error
    301     }
    302     UStringTrieResult result;
    303     if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    304         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    305         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    306         (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    307         (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    308         (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
    309         trie->getValue()!=2000000000
    310     ) {
    311         errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
    312     }
    313     if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    314         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    315         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    316         (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
    317         trie->getValue()!=44444
    318     ) {
    319         errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
    320     }
    321     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    322         (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    323         (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    324         (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current()  // no match for trail surrogate
    325     ) {
    326         errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
    327     }
    328     if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
    329         (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
    330         trie->getValue()!=99999
    331     ) {
    332         errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
    333     }
    334 }
    335 
    336 // Definitions in the anonymous namespace are invisible outside this file.
    337 namespace {
    338 
    339 // Generate (string, value) pairs.
    340 // The first string (before next()) will be empty.
    341 class Generator {
    342 public:
    343     Generator() : value(4711), num(0) {}
    344     void next() {
    345         UChar c;
    346         s.truncate(0);
    347         s.append(c=(UChar)(value>>16));
    348         s.append((UChar)(value>>4));
    349         if(value&1) {
    350             s.append((UChar)value);
    351         }
    352         set.add(c);
    353         value+=((value>>5)&0x7ff)*3+1;
    354         ++num;
    355     }
    356     const UnicodeString &getString() const { return s; }
    357     int32_t getValue() const { return value; }
    358     int32_t countUniqueFirstChars() const { return set.size(); }
    359     int32_t getIndex() const { return num; }
    360 
    361 private:
    362     UnicodeString s;
    363     UnicodeSet set;
    364     int32_t value;
    365     int32_t num;
    366 };
    367 
    368 }  // end namespace
    369 
    370 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
    371     IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
    372     Generator gen;
    373     builder_->clear();
    374     while(gen.countUniqueFirstChars()<numUniqueFirst) {
    375         builder_->add(gen.getString(), gen.getValue(), errorCode);
    376         gen.next();
    377     }
    378     logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
    379     UnicodeString trieUChars;
    380     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
    381     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
    382     return new UCharsTrie(trieUChars.getBuffer());
    383 }
    384 
    385 // Exercise a large branch node.
    386 void UCharsTrieTest::TestLargeTrie() {
    387     LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
    388     if(trie.isNull()) {
    389         return;  // buildTrie() reported an error
    390     }
    391     Generator gen;
    392     while(gen.countUniqueFirstChars()<1111) {
    393         UnicodeString x(gen.getString());
    394         int32_t value=gen.getValue();
    395         if(!x.isEmpty()) {
    396             if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
    397                 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
    398                       x[0], (long)gen.getIndex());
    399                 break;
    400             }
    401             x.remove(0, 1);
    402         }
    403         UStringTrieResult result=trie->next(x.getBuffer(), x.length());
    404         if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
    405             errln("next(%d chars U+%04X U+%04X)!=hasValue or "
    406                   "next()!=current() or getValue() wrong "
    407                   "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
    408             break;
    409         }
    410         gen.next();
    411     }
    412 }
    413 
    414 enum {
    415     u_a=0x61,
    416     u_b=0x62,
    417     u_c=0x63,
    418     u_j=0x6a,
    419     u_n=0x6e,
    420     u_r=0x72,
    421     u_u=0x75,
    422     u_y=0x79
    423 };
    424 
    425 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
    426     // All types of nodes leading to the same value,
    427     // for code coverage of recursive functions.
    428     // In particular, we need a lot of branches on some single level
    429     // to exercise a split-branch node.
    430     static const StringAndValue data[]={
    431         { "august", 8 },
    432         { "jan", 1 },
    433         { "jan.", 1 },
    434         { "jana", 1 },
    435         { "janbb", 1 },
    436         { "janc", 1 },
    437         { "janddd", 1 },
    438         { "janee", 1 },
    439         { "janef", 1 },
    440         { "janf", 1 },
    441         { "jangg", 1 },
    442         { "janh", 1 },
    443         { "janiiii", 1 },
    444         { "janj", 1 },
    445         { "jankk", 1 },
    446         { "jankl", 1 },
    447         { "jankmm", 1 },
    448         { "janl", 1 },
    449         { "janm", 1 },
    450         { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    451         { "jano", 1 },
    452         { "janpp", 1 },
    453         { "janqqq", 1 },
    454         { "janr", 1 },
    455         { "januar", 1 },
    456         { "january", 1 },
    457         { "july", 7 },
    458         { "jun", 6 },
    459         { "jun.", 6 },
    460         { "june", 6 }
    461     };
    462     return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
    463 }
    464 
    465 void UCharsTrieTest::TestHasUniqueValue() {
    466     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    467     if(trie.isNull()) {
    468         return;  // buildTrie() reported an error
    469     }
    470     int32_t uniqueValue;
    471     if(trie->hasUniqueValue(uniqueValue)) {
    472         errln("unique value at root");
    473     }
    474     trie->next(u_j);
    475     trie->next(u_a);
    476     trie->next(u_n);
    477     // hasUniqueValue() directly after next()
    478     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
    479         errln("not unique value 1 after \"jan\"");
    480     }
    481     trie->first(u_j);
    482     trie->next(u_u);
    483     if(trie->hasUniqueValue(uniqueValue)) {
    484         errln("unique value after \"ju\"");
    485     }
    486     if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
    487         errln("not normal value 6 after \"jun\"");
    488     }
    489     // hasUniqueValue() after getValue()
    490     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
    491         errln("not unique value 6 after \"jun\"");
    492     }
    493     // hasUniqueValue() from within a linear-match node
    494     trie->first(u_a);
    495     trie->next(u_u);
    496     if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
    497         errln("not unique value 8 after \"au\"");
    498     }
    499 }
    500 
    501 void UCharsTrieTest::TestGetNextUChars() {
    502     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    503     if(trie.isNull()) {
    504         return;  // buildTrie() reported an error
    505     }
    506     UnicodeString buffer;
    507     UnicodeStringAppendable app(buffer);
    508     int32_t count=trie->getNextUChars(app);
    509     if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
    510         errln("months getNextUChars()!=[aj] at root");
    511     }
    512     trie->next(u_j);
    513     trie->next(u_a);
    514     trie->next(u_n);
    515     // getNextUChars() directly after next()
    516     buffer.remove();
    517     count=trie->getNextUChars(app);
    518     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
    519         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
    520     }
    521     // getNextUChars() after getValue()
    522     trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
    523     buffer.remove();
    524     count=trie->getNextUChars(app);
    525     if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
    526         errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
    527     }
    528     // getNextUChars() from a linear-match node
    529     trie->next(u_u);
    530     buffer.remove();
    531     count=trie->getNextUChars(app);
    532     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
    533         errln("months getNextUChars()!=[a] after \"janu\"");
    534     }
    535     trie->next(u_a);
    536     buffer.remove();
    537     count=trie->getNextUChars(app);
    538     if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
    539         errln("months getNextUChars()!=[r] after \"janua\"");
    540     }
    541     trie->next(u_r);
    542     trie->next(u_y);
    543     // getNextUChars() after a final match
    544     buffer.remove();
    545     count=trie->getNextUChars(app);
    546     if(count!=0 || buffer.length()!=0) {
    547         errln("months getNextUChars()!=[] after \"january\"");
    548     }
    549 }
    550 
    551 void UCharsTrieTest::TestIteratorFromBranch() {
    552     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    553     if(trie.isNull()) {
    554         return;  // buildTrie() reported an error
    555     }
    556     // Go to a branch node.
    557     trie->next(u_j);
    558     trie->next(u_a);
    559     trie->next(u_n);
    560     IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
    561     UCharsTrie::Iterator iter(*trie, 0, errorCode);
    562     if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    563         return;
    564     }
    565     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    566     // following "jan".
    567     static const StringAndValue data[]={
    568         { "", 1 },
    569         { ".", 1 },
    570         { "a", 1 },
    571         { "bb", 1 },
    572         { "c", 1 },
    573         { "ddd", 1 },
    574         { "ee", 1 },
    575         { "ef", 1 },
    576         { "f", 1 },
    577         { "gg", 1 },
    578         { "h", 1 },
    579         { "iiii", 1 },
    580         { "j", 1 },
    581         { "kk", 1 },
    582         { "kl", 1 },
    583         { "kmm", 1 },
    584         { "l", 1 },
    585         { "m", 1 },
    586         { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
    587         { "o", 1 },
    588         { "pp", 1 },
    589         { "qqq", 1 },
    590         { "r", 1 },
    591         { "uar", 1 },
    592         { "uary", 1 }
    593     };
    594     checkIterator(iter, data, UPRV_LENGTHOF(data));
    595     // Reset, and we should get the same result.
    596     logln("after iter.reset()");
    597     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    598 }
    599 
    600 void UCharsTrieTest::TestIteratorFromLinearMatch() {
    601     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
    602     if(trie.isNull()) {
    603         return;  // buildTrie() reported an error
    604     }
    605     // Go into a linear-match node.
    606     trie->next(u_j);
    607     trie->next(u_a);
    608     trie->next(u_n);
    609     trie->next(u_u);
    610     trie->next(u_a);
    611     IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
    612     UCharsTrie::Iterator iter(*trie, 0, errorCode);
    613     if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    614         return;
    615     }
    616     // Expected data: Same as in buildMonthsTrie(), except only the suffixes
    617     // following "janua".
    618     static const StringAndValue data[]={
    619         { "r", 1 },
    620         { "ry", 1 }
    621     };
    622     checkIterator(iter, data, UPRV_LENGTHOF(data));
    623     // Reset, and we should get the same result.
    624     logln("after iter.reset()");
    625     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    626 }
    627 
    628 void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
    629     LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
    630     if(trie.isNull()) {
    631         return;  // buildTrie() reported an error
    632     }
    633     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
    634     UCharsTrie::Iterator iter(*trie, 4, errorCode);
    635     if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    636         return;
    637     }
    638     // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
    639     // of each string, and no string duplicates from the truncation.
    640     static const StringAndValue data[]={
    641         { "augu", -1 },
    642         { "jan", 1 },
    643         { "jan.", 1 },
    644         { "jana", 1 },
    645         { "janb", -1 },
    646         { "janc", 1 },
    647         { "jand", -1 },
    648         { "jane", -1 },
    649         { "janf", 1 },
    650         { "jang", -1 },
    651         { "janh", 1 },
    652         { "jani", -1 },
    653         { "janj", 1 },
    654         { "jank", -1 },
    655         { "janl", 1 },
    656         { "janm", 1 },
    657         { "jann", -1 },
    658         { "jano", 1 },
    659         { "janp", -1 },
    660         { "janq", -1 },
    661         { "janr", 1 },
    662         { "janu", -1 },
    663         { "july", 7 },
    664         { "jun", 6 },
    665         { "jun.", 6 },
    666         { "june", 6 }
    667     };
    668     checkIterator(iter, data, UPRV_LENGTHOF(data));
    669     // Reset, and we should get the same result.
    670     logln("after iter.reset()");
    671     checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
    672 }
    673 
    674 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
    675     static const StringAndValue data[]={
    676         { "abcdef", 10 },
    677         { "abcdepq", 200 },
    678         { "abcdeyz", 3000 }
    679     };
    680     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    681     if(trie.isNull()) {
    682         return;  // buildTrie() reported an error
    683     }
    684     // Go into a linear-match node.
    685     trie->next(u_a);
    686     trie->next(u_b);
    687     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
    688     // Truncate within the linear-match node.
    689     UCharsTrie::Iterator iter(*trie, 2, errorCode);
    690     if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    691         return;
    692     }
    693     static const StringAndValue expected[]={
    694         { "cd", -1 }
    695     };
    696     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
    697     // Reset, and we should get the same result.
    698     logln("after iter.reset()");
    699     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
    700 }
    701 
    702 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
    703     static const StringAndValue data[]={
    704         { "abcdef", 10 },
    705         { "abcdepq", 200 },
    706         { "abcdeyz", 3000 }
    707     };
    708     LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
    709     if(trie.isNull()) {
    710         return;  // buildTrie() reported an error
    711     }
    712     // Go into a linear-match node.
    713     trie->next(u_a);
    714     trie->next(u_b);
    715     trie->next(u_c);
    716     IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
    717     // Truncate after the linear-match node.
    718     UCharsTrie::Iterator iter(*trie, 3, errorCode);
    719     if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
    720         return;
    721     }
    722     static const StringAndValue expected[]={
    723         { "def", 10 },
    724         { "dep", -1 },
    725         { "dey", -1 }
    726     };
    727     checkIterator(iter, expected, UPRV_LENGTHOF(expected));
    728     // Reset, and we should get the same result.
    729     logln("after iter.reset()");
    730     checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
    731 }
    732 
    733 void UCharsTrieTest::TestIteratorFromUChars() {
    734     static const StringAndValue data[]={
    735         { "mm", 3 },
    736         { "mmm", 33 },
    737         { "mmnop", 333 }
    738     };
    739     builder_->clear();
    740     IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
    741     for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
    742         builder_->add(data[i].s, data[i].value, errorCode);
    743     }
    744     UnicodeString trieUChars;
    745     builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
    746     UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
    747     checkIterator(iter, data, UPRV_LENGTHOF(data));
    748 }
    749 
    750 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
    751     logln("checkData(dataLength=%d, fast)", (int)dataLength);
    752     checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
    753     logln("checkData(dataLength=%d, small)", (int)dataLength);
    754     checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
    755 }
    756 
    757 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
    758     LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
    759     if(trie.isNull()) {
    760         return;  // buildTrie() reported an error
    761     }
    762     checkFirst(*trie, data, dataLength);
    763     checkNext(*trie, data, dataLength);
    764     checkNextWithState(*trie, data, dataLength);
    765     checkNextString(*trie, data, dataLength);
    766     checkIterator(*trie, data, dataLength);
    767 }
    768 
    769 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
    770                                       UStringTrieBuildOption buildOption) {
    771     IcuTestErrorCode errorCode(*this, "buildTrie()");
    772     // Add the items to the trie builder in an interesting (not trivial, not random) order.
    773     int32_t index, step;
    774     if(dataLength&1) {
    775         // Odd number of items.
    776         index=dataLength/2;
    777         step=2;
    778     } else if((dataLength%3)!=0) {
    779         // Not a multiple of 3.
    780         index=dataLength/5;
    781         step=3;
    782     } else {
    783         index=dataLength-1;
    784         step=-1;
    785     }
    786     builder_->clear();
    787     for(int32_t i=0; i<dataLength; ++i) {
    788         builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
    789                       data[index].value, errorCode);
    790         index=(index+step)%dataLength;
    791     }
    792     UnicodeString trieUChars;
    793     builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
    794     LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
    795     if(!errorCode.errIfFailureAndReset("add()/build()")) {
    796         builder_->add("zzz", 999, errorCode);
    797         if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
    798             errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
    799         }
    800     }
    801     logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
    802     UnicodeString trieUChars2;
    803     builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
    804     if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
    805         errln("builder.buildUnicodeString() before & after build() returned same array");
    806     }
    807     if(errorCode.isFailure()) {
    808         return NULL;
    809     }
    810     // Tries from either build() method should be identical but
    811     // UCharsTrie does not implement equals().
    812     // We just return either one.
    813     if((dataLength&1)!=0) {
    814         return trie.orphan();
    815     } else {
    816         return new UCharsTrie(trieUChars2.getBuffer());
    817     }
    818 }
    819 
    820 void UCharsTrieTest::checkFirst(UCharsTrie &trie,
    821                                 const StringAndValue data[], int32_t dataLength) {
    822     for(int32_t i=0; i<dataLength; ++i) {
    823         if(*data[i].s==0) {
    824             continue;  // skip empty string
    825         }
    826         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    827         UChar32 c=expectedString[0];
    828         UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
    829         UStringTrieResult firstResult=trie.first(c);
    830         int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
    831         UStringTrieResult nextResult=trie.next(nextCp);
    832         if(firstResult!=trie.reset().next(c) ||
    833            firstResult!=trie.current() ||
    834            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
    835            nextResult!=trie.next(nextCp)
    836         ) {
    837             errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
    838                   c, data[i].s);
    839         }
    840         c=expectedString.char32At(0);
    841         int32_t cLength=U16_LENGTH(c);
    842         nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
    843         firstResult=trie.firstForCodePoint(c);
    844         firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
    845         nextResult=trie.nextForCodePoint(nextCp);
    846         if(firstResult!=trie.reset().nextForCodePoint(c) ||
    847            firstResult!=trie.current() ||
    848            firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
    849            nextResult!=trie.nextForCodePoint(nextCp)
    850         ) {
    851             errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
    852                   c, data[i].s);
    853         }
    854     }
    855     trie.reset();
    856 }
    857 
    858 void UCharsTrieTest::checkNext(UCharsTrie &trie,
    859                                const StringAndValue data[], int32_t dataLength) {
    860     UCharsTrie::State state;
    861     for(int32_t i=0; i<dataLength; ++i) {
    862         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    863         int32_t stringLength= (i&1) ? -1 : expectedString.length();
    864         UStringTrieResult result;
    865         if( !USTRINGTRIE_HAS_VALUE(
    866                 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
    867             result!=trie.current()
    868         ) {
    869             errln("trie does not seem to contain %s", data[i].s);
    870         } else if(trie.getValue()!=data[i].value) {
    871             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    872                   data[i].s,
    873                   (long)trie.getValue(), (long)trie.getValue(),
    874                   (long)data[i].value, (long)data[i].value);
    875         } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
    876             errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
    877         }
    878         trie.reset();
    879         stringLength=expectedString.length();
    880         result=trie.current();
    881         for(int32_t j=0; j<stringLength; ++j) {
    882             if(!USTRINGTRIE_HAS_NEXT(result)) {
    883                 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
    884                 break;
    885             }
    886             if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
    887                 trie.getValue();
    888                 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
    889                     errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
    890                     break;
    891                 }
    892             }
    893             result=trie.next(expectedString[j]);
    894             if(!USTRINGTRIE_MATCHES(result)) {
    895                 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
    896                 break;
    897             }
    898             if(result!=trie.current()) {
    899                 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
    900                 break;
    901             }
    902         }
    903         if(!USTRINGTRIE_HAS_VALUE(result)) {
    904             errln("trie.next()!=hasValue at the end of %s", data[i].s);
    905             continue;
    906         }
    907         trie.getValue();
    908         if(result!=trie.current()) {
    909             errln("trie.current() != current()+getValue()+current() after end of %s",
    910                   data[i].s);
    911         }
    912         // Compare the final current() with whether next() can actually continue.
    913         trie.saveState(state);
    914         UBool nextContinues=FALSE;
    915         for(int32_t c=0x20; c<0xe000; ++c) {
    916             if(c==0x80) {
    917                 c=0xd800;  // Check for ASCII and surrogates but not all of the BMP.
    918             }
    919             if(trie.resetToState(state).next(c)) {
    920                 nextContinues=TRUE;
    921                 break;
    922             }
    923         }
    924         if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
    925             errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
    926                   "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
    927         }
    928         trie.reset();
    929     }
    930 }
    931 
    932 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
    933                                         const StringAndValue data[], int32_t dataLength) {
    934     UCharsTrie::State noState, state;
    935     for(int32_t i=0; i<dataLength; ++i) {
    936         if((i&1)==0) {
    937             // This should have no effect.
    938             trie.resetToState(noState);
    939         }
    940         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    941         int32_t stringLength=expectedString.length();
    942         int32_t partialLength=stringLength/3;
    943         for(int32_t j=0; j<partialLength; ++j) {
    944             if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
    945                 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
    946                 return;
    947             }
    948         }
    949         trie.saveState(state);
    950         UStringTrieResult resultAtState=trie.current();
    951         UStringTrieResult result;
    952         int32_t valueAtState=-99;
    953         if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
    954             valueAtState=trie.getValue();
    955         }
    956         result=trie.next(0);  // mismatch
    957         if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
    958             errln("trie.next(0) matched after part of %s", data[i].s);
    959         }
    960         if( resultAtState!=trie.resetToState(state).current() ||
    961             (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
    962         ) {
    963             errln("trie.next(part of %s) changes current()/getValue() after "
    964                   "saveState/next(0)/resetToState",
    965                   data[i].s);
    966         } else if(!USTRINGTRIE_HAS_VALUE(
    967                       result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
    968                                        stringLength-partialLength)) ||
    969                   result!=trie.current()) {
    970             errln("trie.next(rest of %s) does not seem to contain %s after "
    971                   "saveState/next(0)/resetToState",
    972                   data[i].s, data[i].s);
    973         } else if(!USTRINGTRIE_HAS_VALUE(
    974                       result=trie.resetToState(state).
    975                                   next(expectedString.getTerminatedBuffer()+partialLength,
    976                                        stringLength-partialLength)) ||
    977                   result!=trie.current()) {
    978             errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
    979                   data[i].s);
    980         } else if(trie.getValue()!=data[i].value) {
    981             errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
    982                   data[i].s,
    983                   (long)trie.getValue(), (long)trie.getValue(),
    984                   (long)data[i].value, (long)data[i].value);
    985         }
    986         trie.reset();
    987     }
    988 }
    989 
    990 // next(string) is also tested in other functions,
    991 // but here we try to go partway through the string, and then beyond it.
    992 void UCharsTrieTest::checkNextString(UCharsTrie &trie,
    993                                      const StringAndValue data[], int32_t dataLength) {
    994     for(int32_t i=0; i<dataLength; ++i) {
    995         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
    996         int32_t stringLength=expectedString.length();
    997         if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
    998             errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
    999             continue;
   1000         }
   1001         // Test that we stop properly at the end of the string.
   1002         if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
   1003                      stringLength+1-stringLength/2)) {
   1004             errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
   1005         }
   1006         trie.reset();
   1007     }
   1008 }
   1009 
   1010 void UCharsTrieTest::checkIterator(UCharsTrie &trie,
   1011                                    const StringAndValue data[], int32_t dataLength) {
   1012     IcuTestErrorCode errorCode(*this, "checkIterator()");
   1013     UCharsTrie::Iterator iter(trie, 0, errorCode);
   1014     if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
   1015         return;
   1016     }
   1017     checkIterator(iter, data, dataLength);
   1018 }
   1019 
   1020 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
   1021                                    const StringAndValue data[], int32_t dataLength) {
   1022     IcuTestErrorCode errorCode(*this, "checkIterator()");
   1023     for(int32_t i=0; i<dataLength; ++i) {
   1024         if(!iter.hasNext()) {
   1025             errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
   1026             break;
   1027         }
   1028         UBool hasNext=iter.next(errorCode);
   1029         if(errorCode.errIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
   1030             break;
   1031         }
   1032         if(!hasNext) {
   1033             errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
   1034             break;
   1035         }
   1036         UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
   1037         if(iter.getString()!=expectedString) {
   1038             char buffer[1000];
   1039             UnicodeString invString(prettify(iter.getString()));
   1040             invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV);
   1041             errln("trie iterator next().getString()=%s but expected %s for item %d",
   1042                   buffer, data[i].s, (int)i);
   1043         }
   1044         if(iter.getValue()!=data[i].value) {
   1045             errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
   1046                   (long)iter.getValue(), (long)iter.getValue(),
   1047                   (long)data[i].value, (long)data[i].value,
   1048                   (int)i, data[i].s);
   1049         }
   1050     }
   1051     if(iter.hasNext()) {
   1052         errln("trie iterator hasNext()=TRUE after all items");
   1053     }
   1054     UBool hasNext=iter.next(errorCode);
   1055     errorCode.errIfFailureAndReset("trie iterator next() after all items");
   1056     if(hasNext) {
   1057         errln("trie iterator next()=TRUE after all items");
   1058     }
   1059 }
   1060