1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: ucharstrietest.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010nov16 14 * created by: Markus W. Scherer 15 */ 16 17 #include <string.h> 18 19 #include "unicode/utypes.h" 20 #include "unicode/appendable.h" 21 #include "unicode/localpointer.h" 22 #include "unicode/ucharstrie.h" 23 #include "unicode/ucharstriebuilder.h" 24 #include "unicode/uniset.h" 25 #include "unicode/unistr.h" 26 #include "intltest.h" 27 #include "cmemory.h" 28 29 struct StringAndValue { 30 const char *s; 31 int32_t value; 32 }; 33 34 class UCharsTrieTest : public IntlTest { 35 public: 36 UCharsTrieTest(); 37 virtual ~UCharsTrieTest(); 38 39 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); 40 void TestBuilder(); 41 void TestEmpty(); 42 void Test_a(); 43 void Test_a_ab(); 44 void TestShortestBranch(); 45 void TestBranches(); 46 void TestLongSequence(); 47 void TestLongBranch(); 48 void TestValuesForState(); 49 void TestCompact(); 50 void TestFirstForCodePoint(); 51 void TestNextForCodePoint(); 52 53 UCharsTrie *buildLargeTrie(int32_t numUniqueFirst); 54 void TestLargeTrie(); 55 56 UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); 57 void TestHasUniqueValue(); 58 void TestGetNextUChars(); 59 void TestIteratorFromBranch(); 60 void TestIteratorFromLinearMatch(); 61 void TestTruncatingIteratorFromRoot(); 62 void TestTruncatingIteratorFromLinearMatchShort(); 63 void TestTruncatingIteratorFromLinearMatchLong(); 64 void TestIteratorFromUChars(); 65 66 void checkData(const StringAndValue data[], int32_t dataLength); 67 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); 68 UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength, 69 UStringTrieBuildOption buildOption); 70 void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 71 void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 72 void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 73 void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 74 void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 75 void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); 76 77 private: 78 UCharsTrieBuilder *builder_; 79 }; 80 81 extern IntlTest *createUCharsTrieTest() { 82 return new UCharsTrieTest(); 83 } 84 85 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) { 86 IcuTestErrorCode errorCode(*this, "UCharsTrieTest()"); 87 builder_=new UCharsTrieBuilder(errorCode); 88 } 89 90 UCharsTrieTest::~UCharsTrieTest() { 91 delete builder_; 92 } 93 94 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { 95 if(exec) { 96 logln("TestSuite UCharsTrieTest: "); 97 } 98 TESTCASE_AUTO_BEGIN; 99 TESTCASE_AUTO(TestBuilder); 100 TESTCASE_AUTO(TestEmpty); 101 TESTCASE_AUTO(Test_a); 102 TESTCASE_AUTO(Test_a_ab); 103 TESTCASE_AUTO(TestShortestBranch); 104 TESTCASE_AUTO(TestBranches); 105 TESTCASE_AUTO(TestLongSequence); 106 TESTCASE_AUTO(TestLongBranch); 107 TESTCASE_AUTO(TestValuesForState); 108 TESTCASE_AUTO(TestCompact); 109 TESTCASE_AUTO(TestFirstForCodePoint); 110 TESTCASE_AUTO(TestNextForCodePoint); 111 TESTCASE_AUTO(TestLargeTrie); 112 TESTCASE_AUTO(TestHasUniqueValue); 113 TESTCASE_AUTO(TestGetNextUChars); 114 TESTCASE_AUTO(TestIteratorFromBranch); 115 TESTCASE_AUTO(TestIteratorFromLinearMatch); 116 TESTCASE_AUTO(TestTruncatingIteratorFromRoot); 117 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); 118 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); 119 TESTCASE_AUTO(TestIteratorFromUChars); 120 TESTCASE_AUTO_END; 121 } 122 123 void UCharsTrieTest::TestBuilder() { 124 IcuTestErrorCode errorCode(*this, "TestBuilder()"); 125 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); 126 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { 127 errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); 128 return; 129 } 130 // TODO: remove .build(...) once add() checks for duplicates. 131 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); 132 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { 133 errln("UCharsTrieBuilder.add() did not detect duplicates"); 134 return; 135 } 136 } 137 138 void UCharsTrieTest::TestEmpty() { 139 static const StringAndValue data[]={ 140 { "", 0 } 141 }; 142 checkData(data, UPRV_LENGTHOF(data)); 143 } 144 145 void UCharsTrieTest::Test_a() { 146 static const StringAndValue data[]={ 147 { "a", 1 } 148 }; 149 checkData(data, UPRV_LENGTHOF(data)); 150 } 151 152 void UCharsTrieTest::Test_a_ab() { 153 static const StringAndValue data[]={ 154 { "a", 1 }, 155 { "ab", 100 } 156 }; 157 checkData(data, UPRV_LENGTHOF(data)); 158 } 159 160 void UCharsTrieTest::TestShortestBranch() { 161 static const StringAndValue data[]={ 162 { "a", 1000 }, 163 { "b", 2000 } 164 }; 165 checkData(data, UPRV_LENGTHOF(data)); 166 } 167 168 void UCharsTrieTest::TestBranches() { 169 static const StringAndValue data[]={ 170 { "a", 0x10 }, 171 { "cc", 0x40 }, 172 { "e", 0x100 }, 173 { "ggg", 0x400 }, 174 { "i", 0x1000 }, 175 { "kkkk", 0x4000 }, 176 { "n", 0x10000 }, 177 { "ppppp", 0x40000 }, 178 { "r", 0x100000 }, 179 { "sss", 0x200000 }, 180 { "t", 0x400000 }, 181 { "uu", 0x800000 }, 182 { "vv", 0x7fffffff }, 183 { "zz", (int32_t)0x80000000 } 184 }; 185 for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) { 186 logln("TestBranches length=%d", (int)length); 187 checkData(data, length); 188 } 189 } 190 191 void UCharsTrieTest::TestLongSequence() { 192 static const StringAndValue data[]={ 193 { "a", -1 }, 194 // sequence of linear-match nodes 195 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 }, 196 // more than 256 units 197 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 198 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 199 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 200 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 201 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 202 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 } 203 }; 204 checkData(data, UPRV_LENGTHOF(data)); 205 } 206 207 void UCharsTrieTest::TestLongBranch() { 208 // Split-branch and interesting compact-integer values. 209 static const StringAndValue data[]={ 210 { "a", -2 }, 211 { "b", -1 }, 212 { "c", 0 }, 213 { "d2", 1 }, 214 { "f", 0x3f }, 215 { "g", 0x40 }, 216 { "h", 0x41 }, 217 { "j23", 0x1900 }, 218 { "j24", 0x19ff }, 219 { "j25", 0x1a00 }, 220 { "k2", 0x1a80 }, 221 { "k3", 0x1aff }, 222 { "l234567890", 0x1b00 }, 223 { "l234567890123", 0x1b01 }, 224 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff }, 225 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 }, 226 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 }, 227 { "r", 0x333333 }, 228 { "s2345", 0x4444444 }, 229 { "t234567890", 0x77777777 }, 230 { "z", (int32_t)0x80000001 } 231 }; 232 checkData(data, UPRV_LENGTHOF(data)); 233 } 234 235 void UCharsTrieTest::TestValuesForState() { 236 // Check that saveState() and resetToState() interact properly 237 // with next() and current(). 238 static const StringAndValue data[]={ 239 { "a", -1 }, 240 { "ab", -2 }, 241 { "abc", -3 }, 242 { "abcd", -4 }, 243 { "abcde", -5 }, 244 { "abcdef", -6 } 245 }; 246 checkData(data, UPRV_LENGTHOF(data)); 247 } 248 249 void UCharsTrieTest::TestCompact() { 250 // Duplicate trailing strings and values provide opportunities for compacting. 251 static const StringAndValue data[]={ 252 { "+", 0 }, 253 { "+august", 8 }, 254 { "+december", 12 }, 255 { "+july", 7 }, 256 { "+june", 6 }, 257 { "+november", 11 }, 258 { "+october", 10 }, 259 { "+september", 9 }, 260 { "-", 0 }, 261 { "-august", 8 }, 262 { "-december", 12 }, 263 { "-july", 7 }, 264 { "-june", 6 }, 265 { "-november", 11 }, 266 { "-october", 10 }, 267 { "-september", 9 }, 268 // The l+n branch (with its sub-nodes) is a duplicate but will be written 269 // both times because each time it follows a different linear-match node. 270 { "xjuly", 7 }, 271 { "xjune", 6 } 272 }; 273 checkData(data, UPRV_LENGTHOF(data)); 274 } 275 276 void UCharsTrieTest::TestFirstForCodePoint() { 277 static const StringAndValue data[]={ 278 { "a", 1 }, 279 { "a\\ud800", 2 }, 280 { "a\\U00010000", 3 }, 281 { "\\ud840", 4 }, 282 { "\\U00020000\\udbff", 5 }, 283 { "\\U00020000\\U0010ffff", 6 }, 284 { "\\U00020000\\U0010ffffz", 7 }, 285 { "\\U00050000xy", 8 }, 286 { "\\U00050000xyz", 9 } 287 }; 288 checkData(data, UPRV_LENGTHOF(data)); 289 } 290 291 void UCharsTrieTest::TestNextForCodePoint() { 292 static const StringAndValue data[]={ 293 { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 }, 294 { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 }, 295 { "\\u4dff\\U000103ff", 99999 } 296 }; 297 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 298 if(trie.isNull()) { 299 return; // buildTrie() reported an error 300 } 301 UStringTrieResult result; 302 if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 303 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 304 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 305 (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 306 (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 307 (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || 308 trie->getValue()!=2000000000 309 ) { 310 errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s); 311 } 312 if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 313 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 314 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 315 (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || 316 trie->getValue()!=44444 317 ) { 318 errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s); 319 } 320 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 321 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 322 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 323 (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate 324 ) { 325 errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222"); 326 } 327 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 328 (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || 329 trie->getValue()!=99999 330 ) { 331 errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s); 332 } 333 } 334 335 // Definitions in the anonymous namespace are invisible outside this file. 336 namespace { 337 338 // Generate (string, value) pairs. 339 // The first string (before next()) will be empty. 340 class Generator { 341 public: 342 Generator() : value(4711), num(0) {} 343 void next() { 344 UChar c; 345 s.truncate(0); 346 s.append(c=(UChar)(value>>16)); 347 s.append((UChar)(value>>4)); 348 if(value&1) { 349 s.append((UChar)value); 350 } 351 set.add(c); 352 value+=((value>>5)&0x7ff)*3+1; 353 ++num; 354 } 355 const UnicodeString &getString() const { return s; } 356 int32_t getValue() const { return value; } 357 int32_t countUniqueFirstChars() const { return set.size(); } 358 int32_t getIndex() const { return num; } 359 360 private: 361 UnicodeString s; 362 UnicodeSet set; 363 int32_t value; 364 int32_t num; 365 }; 366 367 } // end namespace 368 369 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) { 370 IcuTestErrorCode errorCode(*this, "buildLargeTrie()"); 371 Generator gen; 372 builder_->clear(); 373 while(gen.countUniqueFirstChars()<numUniqueFirst) { 374 builder_->add(gen.getString(), gen.getValue(), errorCode); 375 gen.next(); 376 } 377 logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex()); 378 UnicodeString trieUChars; 379 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); 380 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); 381 return new UCharsTrie(trieUChars.getBuffer()); 382 } 383 384 // Exercise a large branch node. 385 void UCharsTrieTest::TestLargeTrie() { 386 LocalPointer<UCharsTrie> trie(buildLargeTrie(1111)); 387 if(trie.isNull()) { 388 return; // buildTrie() reported an error 389 } 390 Generator gen; 391 while(gen.countUniqueFirstChars()<1111) { 392 UnicodeString x(gen.getString()); 393 int32_t value=gen.getValue(); 394 if(!x.isEmpty()) { 395 if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) { 396 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n", 397 x[0], (long)gen.getIndex()); 398 break; 399 } 400 x.remove(0, 1); 401 } 402 UStringTrieResult result=trie->next(x.getBuffer(), x.length()); 403 if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) { 404 errln("next(%d chars U+%04X U+%04X)!=hasValue or " 405 "next()!=current() or getValue() wrong " 406 "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex()); 407 break; 408 } 409 gen.next(); 410 } 411 } 412 413 enum { 414 u_a=0x61, 415 u_b=0x62, 416 u_c=0x63, 417 u_j=0x6a, 418 u_n=0x6e, 419 u_r=0x72, 420 u_u=0x75, 421 u_y=0x79 422 }; 423 424 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { 425 // All types of nodes leading to the same value, 426 // for code coverage of recursive functions. 427 // In particular, we need a lot of branches on some single level 428 // to exercise a split-branch node. 429 static const StringAndValue data[]={ 430 { "august", 8 }, 431 { "jan", 1 }, 432 { "jan.", 1 }, 433 { "jana", 1 }, 434 { "janbb", 1 }, 435 { "janc", 1 }, 436 { "janddd", 1 }, 437 { "janee", 1 }, 438 { "janef", 1 }, 439 { "janf", 1 }, 440 { "jangg", 1 }, 441 { "janh", 1 }, 442 { "janiiii", 1 }, 443 { "janj", 1 }, 444 { "jankk", 1 }, 445 { "jankl", 1 }, 446 { "jankmm", 1 }, 447 { "janl", 1 }, 448 { "janm", 1 }, 449 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 450 { "jano", 1 }, 451 { "janpp", 1 }, 452 { "janqqq", 1 }, 453 { "janr", 1 }, 454 { "januar", 1 }, 455 { "january", 1 }, 456 { "july", 7 }, 457 { "jun", 6 }, 458 { "jun.", 6 }, 459 { "june", 6 } 460 }; 461 return buildTrie(data, UPRV_LENGTHOF(data), buildOption); 462 } 463 464 void UCharsTrieTest::TestHasUniqueValue() { 465 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 466 if(trie.isNull()) { 467 return; // buildTrie() reported an error 468 } 469 int32_t uniqueValue; 470 if(trie->hasUniqueValue(uniqueValue)) { 471 errln("unique value at root"); 472 } 473 trie->next(u_j); 474 trie->next(u_a); 475 trie->next(u_n); 476 // hasUniqueValue() directly after next() 477 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { 478 errln("not unique value 1 after \"jan\""); 479 } 480 trie->first(u_j); 481 trie->next(u_u); 482 if(trie->hasUniqueValue(uniqueValue)) { 483 errln("unique value after \"ju\""); 484 } 485 if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { 486 errln("not normal value 6 after \"jun\""); 487 } 488 // hasUniqueValue() after getValue() 489 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { 490 errln("not unique value 6 after \"jun\""); 491 } 492 // hasUniqueValue() from within a linear-match node 493 trie->first(u_a); 494 trie->next(u_u); 495 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { 496 errln("not unique value 8 after \"au\""); 497 } 498 } 499 500 void UCharsTrieTest::TestGetNextUChars() { 501 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 502 if(trie.isNull()) { 503 return; // buildTrie() reported an error 504 } 505 UnicodeString buffer; 506 UnicodeStringAppendable app(buffer); 507 int32_t count=trie->getNextUChars(app); 508 if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) { 509 errln("months getNextUChars()!=[aj] at root"); 510 } 511 trie->next(u_j); 512 trie->next(u_a); 513 trie->next(u_n); 514 // getNextUChars() directly after next() 515 buffer.remove(); 516 count=trie->getNextUChars(app); 517 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { 518 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\""); 519 } 520 // getNextUChars() after getValue() 521 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. 522 buffer.remove(); 523 count=trie->getNextUChars(app); 524 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { 525 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); 526 } 527 // getNextUChars() from a linear-match node 528 trie->next(u_u); 529 buffer.remove(); 530 count=trie->getNextUChars(app); 531 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) { 532 errln("months getNextUChars()!=[a] after \"janu\""); 533 } 534 trie->next(u_a); 535 buffer.remove(); 536 count=trie->getNextUChars(app); 537 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) { 538 errln("months getNextUChars()!=[r] after \"janua\""); 539 } 540 trie->next(u_r); 541 trie->next(u_y); 542 // getNextUChars() after a final match 543 buffer.remove(); 544 count=trie->getNextUChars(app); 545 if(count!=0 || buffer.length()!=0) { 546 errln("months getNextUChars()!=[] after \"january\""); 547 } 548 } 549 550 void UCharsTrieTest::TestIteratorFromBranch() { 551 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 552 if(trie.isNull()) { 553 return; // buildTrie() reported an error 554 } 555 // Go to a branch node. 556 trie->next(u_j); 557 trie->next(u_a); 558 trie->next(u_n); 559 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); 560 UCharsTrie::Iterator iter(*trie, 0, errorCode); 561 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 562 return; 563 } 564 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 565 // following "jan". 566 static const StringAndValue data[]={ 567 { "", 1 }, 568 { ".", 1 }, 569 { "a", 1 }, 570 { "bb", 1 }, 571 { "c", 1 }, 572 { "ddd", 1 }, 573 { "ee", 1 }, 574 { "ef", 1 }, 575 { "f", 1 }, 576 { "gg", 1 }, 577 { "h", 1 }, 578 { "iiii", 1 }, 579 { "j", 1 }, 580 { "kk", 1 }, 581 { "kl", 1 }, 582 { "kmm", 1 }, 583 { "l", 1 }, 584 { "m", 1 }, 585 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 586 { "o", 1 }, 587 { "pp", 1 }, 588 { "qqq", 1 }, 589 { "r", 1 }, 590 { "uar", 1 }, 591 { "uary", 1 } 592 }; 593 checkIterator(iter, data, UPRV_LENGTHOF(data)); 594 // Reset, and we should get the same result. 595 logln("after iter.reset()"); 596 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 597 } 598 599 void UCharsTrieTest::TestIteratorFromLinearMatch() { 600 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 601 if(trie.isNull()) { 602 return; // buildTrie() reported an error 603 } 604 // Go into a linear-match node. 605 trie->next(u_j); 606 trie->next(u_a); 607 trie->next(u_n); 608 trie->next(u_u); 609 trie->next(u_a); 610 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); 611 UCharsTrie::Iterator iter(*trie, 0, errorCode); 612 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 613 return; 614 } 615 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 616 // following "janua". 617 static const StringAndValue data[]={ 618 { "r", 1 }, 619 { "ry", 1 } 620 }; 621 checkIterator(iter, data, UPRV_LENGTHOF(data)); 622 // Reset, and we should get the same result. 623 logln("after iter.reset()"); 624 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 625 } 626 627 void UCharsTrieTest::TestTruncatingIteratorFromRoot() { 628 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 629 if(trie.isNull()) { 630 return; // buildTrie() reported an error 631 } 632 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); 633 UCharsTrie::Iterator iter(*trie, 4, errorCode); 634 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 635 return; 636 } 637 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters 638 // of each string, and no string duplicates from the truncation. 639 static const StringAndValue data[]={ 640 { "augu", -1 }, 641 { "jan", 1 }, 642 { "jan.", 1 }, 643 { "jana", 1 }, 644 { "janb", -1 }, 645 { "janc", 1 }, 646 { "jand", -1 }, 647 { "jane", -1 }, 648 { "janf", 1 }, 649 { "jang", -1 }, 650 { "janh", 1 }, 651 { "jani", -1 }, 652 { "janj", 1 }, 653 { "jank", -1 }, 654 { "janl", 1 }, 655 { "janm", 1 }, 656 { "jann", -1 }, 657 { "jano", 1 }, 658 { "janp", -1 }, 659 { "janq", -1 }, 660 { "janr", 1 }, 661 { "janu", -1 }, 662 { "july", 7 }, 663 { "jun", 6 }, 664 { "jun.", 6 }, 665 { "june", 6 } 666 }; 667 checkIterator(iter, data, UPRV_LENGTHOF(data)); 668 // Reset, and we should get the same result. 669 logln("after iter.reset()"); 670 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 671 } 672 673 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() { 674 static const StringAndValue data[]={ 675 { "abcdef", 10 }, 676 { "abcdepq", 200 }, 677 { "abcdeyz", 3000 } 678 }; 679 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 680 if(trie.isNull()) { 681 return; // buildTrie() reported an error 682 } 683 // Go into a linear-match node. 684 trie->next(u_a); 685 trie->next(u_b); 686 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); 687 // Truncate within the linear-match node. 688 UCharsTrie::Iterator iter(*trie, 2, errorCode); 689 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 690 return; 691 } 692 static const StringAndValue expected[]={ 693 { "cd", -1 } 694 }; 695 checkIterator(iter, expected, UPRV_LENGTHOF(expected)); 696 // Reset, and we should get the same result. 697 logln("after iter.reset()"); 698 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); 699 } 700 701 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() { 702 static const StringAndValue data[]={ 703 { "abcdef", 10 }, 704 { "abcdepq", 200 }, 705 { "abcdeyz", 3000 } 706 }; 707 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 708 if(trie.isNull()) { 709 return; // buildTrie() reported an error 710 } 711 // Go into a linear-match node. 712 trie->next(u_a); 713 trie->next(u_b); 714 trie->next(u_c); 715 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); 716 // Truncate after the linear-match node. 717 UCharsTrie::Iterator iter(*trie, 3, errorCode); 718 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 719 return; 720 } 721 static const StringAndValue expected[]={ 722 { "def", 10 }, 723 { "dep", -1 }, 724 { "dey", -1 } 725 }; 726 checkIterator(iter, expected, UPRV_LENGTHOF(expected)); 727 // Reset, and we should get the same result. 728 logln("after iter.reset()"); 729 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); 730 } 731 732 void UCharsTrieTest::TestIteratorFromUChars() { 733 static const StringAndValue data[]={ 734 { "mm", 3 }, 735 { "mmm", 33 }, 736 { "mmnop", 333 } 737 }; 738 builder_->clear(); 739 IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()"); 740 for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) { 741 builder_->add(data[i].s, data[i].value, errorCode); 742 } 743 UnicodeString trieUChars; 744 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); 745 UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode); 746 checkIterator(iter, data, UPRV_LENGTHOF(data)); 747 } 748 749 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { 750 logln("checkData(dataLength=%d, fast)", (int)dataLength); 751 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); 752 logln("checkData(dataLength=%d, small)", (int)dataLength); 753 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); 754 } 755 756 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { 757 LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption)); 758 if(trie.isNull()) { 759 return; // buildTrie() reported an error 760 } 761 checkFirst(*trie, data, dataLength); 762 checkNext(*trie, data, dataLength); 763 checkNextWithState(*trie, data, dataLength); 764 checkNextString(*trie, data, dataLength); 765 checkIterator(*trie, data, dataLength); 766 } 767 768 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, 769 UStringTrieBuildOption buildOption) { 770 IcuTestErrorCode errorCode(*this, "buildTrie()"); 771 // Add the items to the trie builder in an interesting (not trivial, not random) order. 772 int32_t index, step; 773 if(dataLength&1) { 774 // Odd number of items. 775 index=dataLength/2; 776 step=2; 777 } else if((dataLength%3)!=0) { 778 // Not a multiple of 3. 779 index=dataLength/5; 780 step=3; 781 } else { 782 index=dataLength-1; 783 step=-1; 784 } 785 builder_->clear(); 786 for(int32_t i=0; i<dataLength; ++i) { 787 builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(), 788 data[index].value, errorCode); 789 index=(index+step)%dataLength; 790 } 791 UnicodeString trieUChars; 792 builder_->buildUnicodeString(buildOption, trieUChars, errorCode); 793 LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode)); 794 if(!errorCode.logIfFailureAndReset("add()/build()")) { 795 builder_->add("zzz", 999, errorCode); 796 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { 797 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); 798 } 799 } 800 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); 801 UnicodeString trieUChars2; 802 builder_->buildUnicodeString(buildOption, trieUChars2, errorCode); 803 if(trieUChars.getBuffer()==trieUChars2.getBuffer()) { 804 errln("builder.buildUnicodeString() before & after build() returned same array"); 805 } 806 if(errorCode.isFailure()) { 807 return NULL; 808 } 809 // Tries from either build() method should be identical but 810 // UCharsTrie does not implement equals(). 811 // We just return either one. 812 if((dataLength&1)!=0) { 813 return trie.orphan(); 814 } else { 815 return new UCharsTrie(trieUChars2.getBuffer()); 816 } 817 } 818 819 void UCharsTrieTest::checkFirst(UCharsTrie &trie, 820 const StringAndValue data[], int32_t dataLength) { 821 for(int32_t i=0; i<dataLength; ++i) { 822 if(*data[i].s==0) { 823 continue; // skip empty string 824 } 825 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 826 UChar32 c=expectedString[0]; 827 UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0; 828 UStringTrieResult firstResult=trie.first(c); 829 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; 830 UStringTrieResult nextResult=trie.next(nextCp); 831 if(firstResult!=trie.reset().next(c) || 832 firstResult!=trie.current() || 833 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || 834 nextResult!=trie.next(nextCp) 835 ) { 836 errln("trie.first(U+%04X)!=trie.reset().next(same) for %s", 837 c, data[i].s); 838 } 839 c=expectedString.char32At(0); 840 int32_t cLength=U16_LENGTH(c); 841 nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0; 842 firstResult=trie.firstForCodePoint(c); 843 firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; 844 nextResult=trie.nextForCodePoint(nextCp); 845 if(firstResult!=trie.reset().nextForCodePoint(c) || 846 firstResult!=trie.current() || 847 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || 848 nextResult!=trie.nextForCodePoint(nextCp) 849 ) { 850 errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s", 851 c, data[i].s); 852 } 853 } 854 trie.reset(); 855 } 856 857 void UCharsTrieTest::checkNext(UCharsTrie &trie, 858 const StringAndValue data[], int32_t dataLength) { 859 UCharsTrie::State state; 860 for(int32_t i=0; i<dataLength; ++i) { 861 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 862 int32_t stringLength= (i&1) ? -1 : expectedString.length(); 863 UStringTrieResult result; 864 if( !USTRINGTRIE_HAS_VALUE( 865 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) || 866 result!=trie.current() 867 ) { 868 errln("trie does not seem to contain %s", data[i].s); 869 } else if(trie.getValue()!=data[i].value) { 870 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 871 data[i].s, 872 (long)trie.getValue(), (long)trie.getValue(), 873 (long)data[i].value, (long)data[i].value); 874 } else if(result!=trie.current() || trie.getValue()!=data[i].value) { 875 errln("trie value for %s changes when repeating current()/getValue()", data[i].s); 876 } 877 trie.reset(); 878 stringLength=expectedString.length(); 879 result=trie.current(); 880 for(int32_t j=0; j<stringLength; ++j) { 881 if(!USTRINGTRIE_HAS_NEXT(result)) { 882 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j); 883 break; 884 } 885 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) { 886 trie.getValue(); 887 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) { 888 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j); 889 break; 890 } 891 } 892 result=trie.next(expectedString[j]); 893 if(!USTRINGTRIE_MATCHES(result)) { 894 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j); 895 break; 896 } 897 if(result!=trie.current()) { 898 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j); 899 break; 900 } 901 } 902 if(!USTRINGTRIE_HAS_VALUE(result)) { 903 errln("trie.next()!=hasValue at the end of %s", data[i].s); 904 continue; 905 } 906 trie.getValue(); 907 if(result!=trie.current()) { 908 errln("trie.current() != current()+getValue()+current() after end of %s", 909 data[i].s); 910 } 911 // Compare the final current() with whether next() can actually continue. 912 trie.saveState(state); 913 UBool nextContinues=FALSE; 914 for(int32_t c=0x20; c<0xe000; ++c) { 915 if(c==0x80) { 916 c=0xd800; // Check for ASCII and surrogates but not all of the BMP. 917 } 918 if(trie.resetToState(state).next(c)) { 919 nextContinues=TRUE; 920 break; 921 } 922 } 923 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) { 924 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts " 925 "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s); 926 } 927 trie.reset(); 928 } 929 } 930 931 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie, 932 const StringAndValue data[], int32_t dataLength) { 933 UCharsTrie::State noState, state; 934 for(int32_t i=0; i<dataLength; ++i) { 935 if((i&1)==0) { 936 // This should have no effect. 937 trie.resetToState(noState); 938 } 939 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 940 int32_t stringLength=expectedString.length(); 941 int32_t partialLength=stringLength/3; 942 for(int32_t j=0; j<partialLength; ++j) { 943 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) { 944 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s); 945 return; 946 } 947 } 948 trie.saveState(state); 949 UStringTrieResult resultAtState=trie.current(); 950 UStringTrieResult result; 951 int32_t valueAtState=-99; 952 if(USTRINGTRIE_HAS_VALUE(resultAtState)) { 953 valueAtState=trie.getValue(); 954 } 955 result=trie.next(0); // mismatch 956 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) { 957 errln("trie.next(0) matched after part of %s", data[i].s); 958 } 959 if( resultAtState!=trie.resetToState(state).current() || 960 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue()) 961 ) { 962 errln("trie.next(part of %s) changes current()/getValue() after " 963 "saveState/next(0)/resetToState", 964 data[i].s); 965 } else if(!USTRINGTRIE_HAS_VALUE( 966 result=trie.next(expectedString.getTerminatedBuffer()+partialLength, 967 stringLength-partialLength)) || 968 result!=trie.current()) { 969 errln("trie.next(rest of %s) does not seem to contain %s after " 970 "saveState/next(0)/resetToState", 971 data[i].s, data[i].s); 972 } else if(!USTRINGTRIE_HAS_VALUE( 973 result=trie.resetToState(state). 974 next(expectedString.getTerminatedBuffer()+partialLength, 975 stringLength-partialLength)) || 976 result!=trie.current()) { 977 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState", 978 data[i].s); 979 } else if(trie.getValue()!=data[i].value) { 980 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 981 data[i].s, 982 (long)trie.getValue(), (long)trie.getValue(), 983 (long)data[i].value, (long)data[i].value); 984 } 985 trie.reset(); 986 } 987 } 988 989 // next(string) is also tested in other functions, 990 // but here we try to go partway through the string, and then beyond it. 991 void UCharsTrieTest::checkNextString(UCharsTrie &trie, 992 const StringAndValue data[], int32_t dataLength) { 993 for(int32_t i=0; i<dataLength; ++i) { 994 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 995 int32_t stringLength=expectedString.length(); 996 if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) { 997 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s); 998 continue; 999 } 1000 // Test that we stop properly at the end of the string. 1001 if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2, 1002 stringLength+1-stringLength/2)) { 1003 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s); 1004 } 1005 trie.reset(); 1006 } 1007 } 1008 1009 void UCharsTrieTest::checkIterator(UCharsTrie &trie, 1010 const StringAndValue data[], int32_t dataLength) { 1011 IcuTestErrorCode errorCode(*this, "checkIterator()"); 1012 UCharsTrie::Iterator iter(trie, 0, errorCode); 1013 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) { 1014 return; 1015 } 1016 checkIterator(iter, data, dataLength); 1017 } 1018 1019 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter, 1020 const StringAndValue data[], int32_t dataLength) { 1021 IcuTestErrorCode errorCode(*this, "checkIterator()"); 1022 for(int32_t i=0; i<dataLength; ++i) { 1023 if(!iter.hasNext()) { 1024 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s); 1025 break; 1026 } 1027 UBool hasNext=iter.next(errorCode); 1028 if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) { 1029 break; 1030 } 1031 if(!hasNext) { 1032 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s); 1033 break; 1034 } 1035 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 1036 if(iter.getString()!=expectedString) { 1037 char buffer[1000]; 1038 UnicodeString invString(prettify(iter.getString())); 1039 invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV); 1040 errln("trie iterator next().getString()=%s but expected %s for item %d", 1041 buffer, data[i].s, (int)i); 1042 } 1043 if(iter.getValue()!=data[i].value) { 1044 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s", 1045 (long)iter.getValue(), (long)iter.getValue(), 1046 (long)data[i].value, (long)data[i].value, 1047 (int)i, data[i].s); 1048 } 1049 } 1050 if(iter.hasNext()) { 1051 errln("trie iterator hasNext()=TRUE after all items"); 1052 } 1053 UBool hasNext=iter.next(errorCode); 1054 errorCode.logIfFailureAndReset("trie iterator next() after all items"); 1055 if(hasNext) { 1056 errln("trie iterator next()=TRUE after all items"); 1057 } 1058 } 1059