1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: ucharstrietest.cpp 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010nov16 14 * created by: Markus W. Scherer 15 */ 16 17 #include <string.h> 18 19 #include "unicode/utypes.h" 20 #include "unicode/appendable.h" 21 #include "unicode/localpointer.h" 22 #include "unicode/ucharstrie.h" 23 #include "unicode/ucharstriebuilder.h" 24 #include "unicode/uniset.h" 25 #include "unicode/unistr.h" 26 #include "unicode/utf16.h" 27 #include "intltest.h" 28 #include "cmemory.h" 29 30 struct StringAndValue { 31 const char *s; 32 int32_t value; 33 }; 34 35 class UCharsTrieTest : public IntlTest { 36 public: 37 UCharsTrieTest(); 38 virtual ~UCharsTrieTest(); 39 40 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); 41 void TestBuilder(); 42 void TestEmpty(); 43 void Test_a(); 44 void Test_a_ab(); 45 void TestShortestBranch(); 46 void TestBranches(); 47 void TestLongSequence(); 48 void TestLongBranch(); 49 void TestValuesForState(); 50 void TestCompact(); 51 void TestFirstForCodePoint(); 52 void TestNextForCodePoint(); 53 54 UCharsTrie *buildLargeTrie(int32_t numUniqueFirst); 55 void TestLargeTrie(); 56 57 UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); 58 void TestHasUniqueValue(); 59 void TestGetNextUChars(); 60 void TestIteratorFromBranch(); 61 void TestIteratorFromLinearMatch(); 62 void TestTruncatingIteratorFromRoot(); 63 void TestTruncatingIteratorFromLinearMatchShort(); 64 void TestTruncatingIteratorFromLinearMatchLong(); 65 void TestIteratorFromUChars(); 66 67 void checkData(const StringAndValue data[], int32_t dataLength); 68 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); 69 UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength, 70 UStringTrieBuildOption buildOption); 71 void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 72 void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 73 void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 74 void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 75 void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 76 void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); 77 78 private: 79 UCharsTrieBuilder *builder_; 80 }; 81 82 extern IntlTest *createUCharsTrieTest() { 83 return new UCharsTrieTest(); 84 } 85 86 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) { 87 IcuTestErrorCode errorCode(*this, "UCharsTrieTest()"); 88 builder_=new UCharsTrieBuilder(errorCode); 89 } 90 91 UCharsTrieTest::~UCharsTrieTest() { 92 delete builder_; 93 } 94 95 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { 96 if(exec) { 97 logln("TestSuite UCharsTrieTest: "); 98 } 99 TESTCASE_AUTO_BEGIN; 100 TESTCASE_AUTO(TestBuilder); 101 TESTCASE_AUTO(TestEmpty); 102 TESTCASE_AUTO(Test_a); 103 TESTCASE_AUTO(Test_a_ab); 104 TESTCASE_AUTO(TestShortestBranch); 105 TESTCASE_AUTO(TestBranches); 106 TESTCASE_AUTO(TestLongSequence); 107 TESTCASE_AUTO(TestLongBranch); 108 TESTCASE_AUTO(TestValuesForState); 109 TESTCASE_AUTO(TestCompact); 110 TESTCASE_AUTO(TestFirstForCodePoint); 111 TESTCASE_AUTO(TestNextForCodePoint); 112 TESTCASE_AUTO(TestLargeTrie); 113 TESTCASE_AUTO(TestHasUniqueValue); 114 TESTCASE_AUTO(TestGetNextUChars); 115 TESTCASE_AUTO(TestIteratorFromBranch); 116 TESTCASE_AUTO(TestIteratorFromLinearMatch); 117 TESTCASE_AUTO(TestTruncatingIteratorFromRoot); 118 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); 119 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); 120 TESTCASE_AUTO(TestIteratorFromUChars); 121 TESTCASE_AUTO_END; 122 } 123 124 void UCharsTrieTest::TestBuilder() { 125 IcuTestErrorCode errorCode(*this, "TestBuilder()"); 126 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); 127 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { 128 errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); 129 return; 130 } 131 // TODO: remove .build(...) once add() checks for duplicates. 132 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); 133 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { 134 errln("UCharsTrieBuilder.add() did not detect duplicates"); 135 return; 136 } 137 } 138 139 void UCharsTrieTest::TestEmpty() { 140 static const StringAndValue data[]={ 141 { "", 0 } 142 }; 143 checkData(data, UPRV_LENGTHOF(data)); 144 } 145 146 void UCharsTrieTest::Test_a() { 147 static const StringAndValue data[]={ 148 { "a", 1 } 149 }; 150 checkData(data, UPRV_LENGTHOF(data)); 151 } 152 153 void UCharsTrieTest::Test_a_ab() { 154 static const StringAndValue data[]={ 155 { "a", 1 }, 156 { "ab", 100 } 157 }; 158 checkData(data, UPRV_LENGTHOF(data)); 159 } 160 161 void UCharsTrieTest::TestShortestBranch() { 162 static const StringAndValue data[]={ 163 { "a", 1000 }, 164 { "b", 2000 } 165 }; 166 checkData(data, UPRV_LENGTHOF(data)); 167 } 168 169 void UCharsTrieTest::TestBranches() { 170 static const StringAndValue data[]={ 171 { "a", 0x10 }, 172 { "cc", 0x40 }, 173 { "e", 0x100 }, 174 { "ggg", 0x400 }, 175 { "i", 0x1000 }, 176 { "kkkk", 0x4000 }, 177 { "n", 0x10000 }, 178 { "ppppp", 0x40000 }, 179 { "r", 0x100000 }, 180 { "sss", 0x200000 }, 181 { "t", 0x400000 }, 182 { "uu", 0x800000 }, 183 { "vv", 0x7fffffff }, 184 { "zz", (int32_t)0x80000000 } 185 }; 186 for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) { 187 logln("TestBranches length=%d", (int)length); 188 checkData(data, length); 189 } 190 } 191 192 void UCharsTrieTest::TestLongSequence() { 193 static const StringAndValue data[]={ 194 { "a", -1 }, 195 // sequence of linear-match nodes 196 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 }, 197 // more than 256 units 198 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 199 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 200 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 201 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 202 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 203 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 } 204 }; 205 checkData(data, UPRV_LENGTHOF(data)); 206 } 207 208 void UCharsTrieTest::TestLongBranch() { 209 // Split-branch and interesting compact-integer values. 210 static const StringAndValue data[]={ 211 { "a", -2 }, 212 { "b", -1 }, 213 { "c", 0 }, 214 { "d2", 1 }, 215 { "f", 0x3f }, 216 { "g", 0x40 }, 217 { "h", 0x41 }, 218 { "j23", 0x1900 }, 219 { "j24", 0x19ff }, 220 { "j25", 0x1a00 }, 221 { "k2", 0x1a80 }, 222 { "k3", 0x1aff }, 223 { "l234567890", 0x1b00 }, 224 { "l234567890123", 0x1b01 }, 225 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff }, 226 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 }, 227 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 }, 228 { "r", 0x333333 }, 229 { "s2345", 0x4444444 }, 230 { "t234567890", 0x77777777 }, 231 { "z", (int32_t)0x80000001 } 232 }; 233 checkData(data, UPRV_LENGTHOF(data)); 234 } 235 236 void UCharsTrieTest::TestValuesForState() { 237 // Check that saveState() and resetToState() interact properly 238 // with next() and current(). 239 static const StringAndValue data[]={ 240 { "a", -1 }, 241 { "ab", -2 }, 242 { "abc", -3 }, 243 { "abcd", -4 }, 244 { "abcde", -5 }, 245 { "abcdef", -6 } 246 }; 247 checkData(data, UPRV_LENGTHOF(data)); 248 } 249 250 void UCharsTrieTest::TestCompact() { 251 // Duplicate trailing strings and values provide opportunities for compacting. 252 static const StringAndValue data[]={ 253 { "+", 0 }, 254 { "+august", 8 }, 255 { "+december", 12 }, 256 { "+july", 7 }, 257 { "+june", 6 }, 258 { "+november", 11 }, 259 { "+october", 10 }, 260 { "+september", 9 }, 261 { "-", 0 }, 262 { "-august", 8 }, 263 { "-december", 12 }, 264 { "-july", 7 }, 265 { "-june", 6 }, 266 { "-november", 11 }, 267 { "-october", 10 }, 268 { "-september", 9 }, 269 // The l+n branch (with its sub-nodes) is a duplicate but will be written 270 // both times because each time it follows a different linear-match node. 271 { "xjuly", 7 }, 272 { "xjune", 6 } 273 }; 274 checkData(data, UPRV_LENGTHOF(data)); 275 } 276 277 void UCharsTrieTest::TestFirstForCodePoint() { 278 static const StringAndValue data[]={ 279 { "a", 1 }, 280 { "a\\ud800", 2 }, 281 { "a\\U00010000", 3 }, 282 { "\\ud840", 4 }, 283 { "\\U00020000\\udbff", 5 }, 284 { "\\U00020000\\U0010ffff", 6 }, 285 { "\\U00020000\\U0010ffffz", 7 }, 286 { "\\U00050000xy", 8 }, 287 { "\\U00050000xyz", 9 } 288 }; 289 checkData(data, UPRV_LENGTHOF(data)); 290 } 291 292 void UCharsTrieTest::TestNextForCodePoint() { 293 static const StringAndValue data[]={ 294 { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 }, 295 { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 }, 296 { "\\u4dff\\U000103ff", 99999 } 297 }; 298 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 299 if(trie.isNull()) { 300 return; // buildTrie() reported an error 301 } 302 UStringTrieResult result; 303 if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 304 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 305 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 306 (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 307 (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 308 (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || 309 trie->getValue()!=2000000000 310 ) { 311 errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s); 312 } 313 if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 314 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 315 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 316 (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || 317 trie->getValue()!=44444 318 ) { 319 errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s); 320 } 321 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 322 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 323 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 324 (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate 325 ) { 326 errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222"); 327 } 328 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 329 (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || 330 trie->getValue()!=99999 331 ) { 332 errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s); 333 } 334 } 335 336 // Definitions in the anonymous namespace are invisible outside this file. 337 namespace { 338 339 // Generate (string, value) pairs. 340 // The first string (before next()) will be empty. 341 class Generator { 342 public: 343 Generator() : value(4711), num(0) {} 344 void next() { 345 UChar c; 346 s.truncate(0); 347 s.append(c=(UChar)(value>>16)); 348 s.append((UChar)(value>>4)); 349 if(value&1) { 350 s.append((UChar)value); 351 } 352 set.add(c); 353 value+=((value>>5)&0x7ff)*3+1; 354 ++num; 355 } 356 const UnicodeString &getString() const { return s; } 357 int32_t getValue() const { return value; } 358 int32_t countUniqueFirstChars() const { return set.size(); } 359 int32_t getIndex() const { return num; } 360 361 private: 362 UnicodeString s; 363 UnicodeSet set; 364 int32_t value; 365 int32_t num; 366 }; 367 368 } // end namespace 369 370 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) { 371 IcuTestErrorCode errorCode(*this, "buildLargeTrie()"); 372 Generator gen; 373 builder_->clear(); 374 while(gen.countUniqueFirstChars()<numUniqueFirst) { 375 builder_->add(gen.getString(), gen.getValue(), errorCode); 376 gen.next(); 377 } 378 logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex()); 379 UnicodeString trieUChars; 380 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); 381 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); 382 return new UCharsTrie(trieUChars.getBuffer()); 383 } 384 385 // Exercise a large branch node. 386 void UCharsTrieTest::TestLargeTrie() { 387 LocalPointer<UCharsTrie> trie(buildLargeTrie(1111)); 388 if(trie.isNull()) { 389 return; // buildTrie() reported an error 390 } 391 Generator gen; 392 while(gen.countUniqueFirstChars()<1111) { 393 UnicodeString x(gen.getString()); 394 int32_t value=gen.getValue(); 395 if(!x.isEmpty()) { 396 if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) { 397 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n", 398 x[0], (long)gen.getIndex()); 399 break; 400 } 401 x.remove(0, 1); 402 } 403 UStringTrieResult result=trie->next(x.getBuffer(), x.length()); 404 if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) { 405 errln("next(%d chars U+%04X U+%04X)!=hasValue or " 406 "next()!=current() or getValue() wrong " 407 "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex()); 408 break; 409 } 410 gen.next(); 411 } 412 } 413 414 enum { 415 u_a=0x61, 416 u_b=0x62, 417 u_c=0x63, 418 u_j=0x6a, 419 u_n=0x6e, 420 u_r=0x72, 421 u_u=0x75, 422 u_y=0x79 423 }; 424 425 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { 426 // All types of nodes leading to the same value, 427 // for code coverage of recursive functions. 428 // In particular, we need a lot of branches on some single level 429 // to exercise a split-branch node. 430 static const StringAndValue data[]={ 431 { "august", 8 }, 432 { "jan", 1 }, 433 { "jan.", 1 }, 434 { "jana", 1 }, 435 { "janbb", 1 }, 436 { "janc", 1 }, 437 { "janddd", 1 }, 438 { "janee", 1 }, 439 { "janef", 1 }, 440 { "janf", 1 }, 441 { "jangg", 1 }, 442 { "janh", 1 }, 443 { "janiiii", 1 }, 444 { "janj", 1 }, 445 { "jankk", 1 }, 446 { "jankl", 1 }, 447 { "jankmm", 1 }, 448 { "janl", 1 }, 449 { "janm", 1 }, 450 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 451 { "jano", 1 }, 452 { "janpp", 1 }, 453 { "janqqq", 1 }, 454 { "janr", 1 }, 455 { "januar", 1 }, 456 { "january", 1 }, 457 { "july", 7 }, 458 { "jun", 6 }, 459 { "jun.", 6 }, 460 { "june", 6 } 461 }; 462 return buildTrie(data, UPRV_LENGTHOF(data), buildOption); 463 } 464 465 void UCharsTrieTest::TestHasUniqueValue() { 466 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 467 if(trie.isNull()) { 468 return; // buildTrie() reported an error 469 } 470 int32_t uniqueValue; 471 if(trie->hasUniqueValue(uniqueValue)) { 472 errln("unique value at root"); 473 } 474 trie->next(u_j); 475 trie->next(u_a); 476 trie->next(u_n); 477 // hasUniqueValue() directly after next() 478 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { 479 errln("not unique value 1 after \"jan\""); 480 } 481 trie->first(u_j); 482 trie->next(u_u); 483 if(trie->hasUniqueValue(uniqueValue)) { 484 errln("unique value after \"ju\""); 485 } 486 if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { 487 errln("not normal value 6 after \"jun\""); 488 } 489 // hasUniqueValue() after getValue() 490 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { 491 errln("not unique value 6 after \"jun\""); 492 } 493 // hasUniqueValue() from within a linear-match node 494 trie->first(u_a); 495 trie->next(u_u); 496 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { 497 errln("not unique value 8 after \"au\""); 498 } 499 } 500 501 void UCharsTrieTest::TestGetNextUChars() { 502 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 503 if(trie.isNull()) { 504 return; // buildTrie() reported an error 505 } 506 UnicodeString buffer; 507 UnicodeStringAppendable app(buffer); 508 int32_t count=trie->getNextUChars(app); 509 if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) { 510 errln("months getNextUChars()!=[aj] at root"); 511 } 512 trie->next(u_j); 513 trie->next(u_a); 514 trie->next(u_n); 515 // getNextUChars() directly after next() 516 buffer.remove(); 517 count=trie->getNextUChars(app); 518 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { 519 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\""); 520 } 521 // getNextUChars() after getValue() 522 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. 523 buffer.remove(); 524 count=trie->getNextUChars(app); 525 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { 526 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); 527 } 528 // getNextUChars() from a linear-match node 529 trie->next(u_u); 530 buffer.remove(); 531 count=trie->getNextUChars(app); 532 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) { 533 errln("months getNextUChars()!=[a] after \"janu\""); 534 } 535 trie->next(u_a); 536 buffer.remove(); 537 count=trie->getNextUChars(app); 538 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) { 539 errln("months getNextUChars()!=[r] after \"janua\""); 540 } 541 trie->next(u_r); 542 trie->next(u_y); 543 // getNextUChars() after a final match 544 buffer.remove(); 545 count=trie->getNextUChars(app); 546 if(count!=0 || buffer.length()!=0) { 547 errln("months getNextUChars()!=[] after \"january\""); 548 } 549 } 550 551 void UCharsTrieTest::TestIteratorFromBranch() { 552 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 553 if(trie.isNull()) { 554 return; // buildTrie() reported an error 555 } 556 // Go to a branch node. 557 trie->next(u_j); 558 trie->next(u_a); 559 trie->next(u_n); 560 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); 561 UCharsTrie::Iterator iter(*trie, 0, errorCode); 562 if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 563 return; 564 } 565 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 566 // following "jan". 567 static const StringAndValue data[]={ 568 { "", 1 }, 569 { ".", 1 }, 570 { "a", 1 }, 571 { "bb", 1 }, 572 { "c", 1 }, 573 { "ddd", 1 }, 574 { "ee", 1 }, 575 { "ef", 1 }, 576 { "f", 1 }, 577 { "gg", 1 }, 578 { "h", 1 }, 579 { "iiii", 1 }, 580 { "j", 1 }, 581 { "kk", 1 }, 582 { "kl", 1 }, 583 { "kmm", 1 }, 584 { "l", 1 }, 585 { "m", 1 }, 586 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 587 { "o", 1 }, 588 { "pp", 1 }, 589 { "qqq", 1 }, 590 { "r", 1 }, 591 { "uar", 1 }, 592 { "uary", 1 } 593 }; 594 checkIterator(iter, data, UPRV_LENGTHOF(data)); 595 // Reset, and we should get the same result. 596 logln("after iter.reset()"); 597 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 598 } 599 600 void UCharsTrieTest::TestIteratorFromLinearMatch() { 601 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 602 if(trie.isNull()) { 603 return; // buildTrie() reported an error 604 } 605 // Go into a linear-match node. 606 trie->next(u_j); 607 trie->next(u_a); 608 trie->next(u_n); 609 trie->next(u_u); 610 trie->next(u_a); 611 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); 612 UCharsTrie::Iterator iter(*trie, 0, errorCode); 613 if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 614 return; 615 } 616 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 617 // following "janua". 618 static const StringAndValue data[]={ 619 { "r", 1 }, 620 { "ry", 1 } 621 }; 622 checkIterator(iter, data, UPRV_LENGTHOF(data)); 623 // Reset, and we should get the same result. 624 logln("after iter.reset()"); 625 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 626 } 627 628 void UCharsTrieTest::TestTruncatingIteratorFromRoot() { 629 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 630 if(trie.isNull()) { 631 return; // buildTrie() reported an error 632 } 633 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); 634 UCharsTrie::Iterator iter(*trie, 4, errorCode); 635 if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 636 return; 637 } 638 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters 639 // of each string, and no string duplicates from the truncation. 640 static const StringAndValue data[]={ 641 { "augu", -1 }, 642 { "jan", 1 }, 643 { "jan.", 1 }, 644 { "jana", 1 }, 645 { "janb", -1 }, 646 { "janc", 1 }, 647 { "jand", -1 }, 648 { "jane", -1 }, 649 { "janf", 1 }, 650 { "jang", -1 }, 651 { "janh", 1 }, 652 { "jani", -1 }, 653 { "janj", 1 }, 654 { "jank", -1 }, 655 { "janl", 1 }, 656 { "janm", 1 }, 657 { "jann", -1 }, 658 { "jano", 1 }, 659 { "janp", -1 }, 660 { "janq", -1 }, 661 { "janr", 1 }, 662 { "janu", -1 }, 663 { "july", 7 }, 664 { "jun", 6 }, 665 { "jun.", 6 }, 666 { "june", 6 } 667 }; 668 checkIterator(iter, data, UPRV_LENGTHOF(data)); 669 // Reset, and we should get the same result. 670 logln("after iter.reset()"); 671 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 672 } 673 674 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() { 675 static const StringAndValue data[]={ 676 { "abcdef", 10 }, 677 { "abcdepq", 200 }, 678 { "abcdeyz", 3000 } 679 }; 680 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 681 if(trie.isNull()) { 682 return; // buildTrie() reported an error 683 } 684 // Go into a linear-match node. 685 trie->next(u_a); 686 trie->next(u_b); 687 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); 688 // Truncate within the linear-match node. 689 UCharsTrie::Iterator iter(*trie, 2, errorCode); 690 if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 691 return; 692 } 693 static const StringAndValue expected[]={ 694 { "cd", -1 } 695 }; 696 checkIterator(iter, expected, UPRV_LENGTHOF(expected)); 697 // Reset, and we should get the same result. 698 logln("after iter.reset()"); 699 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); 700 } 701 702 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() { 703 static const StringAndValue data[]={ 704 { "abcdef", 10 }, 705 { "abcdepq", 200 }, 706 { "abcdeyz", 3000 } 707 }; 708 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 709 if(trie.isNull()) { 710 return; // buildTrie() reported an error 711 } 712 // Go into a linear-match node. 713 trie->next(u_a); 714 trie->next(u_b); 715 trie->next(u_c); 716 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); 717 // Truncate after the linear-match node. 718 UCharsTrie::Iterator iter(*trie, 3, errorCode); 719 if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 720 return; 721 } 722 static const StringAndValue expected[]={ 723 { "def", 10 }, 724 { "dep", -1 }, 725 { "dey", -1 } 726 }; 727 checkIterator(iter, expected, UPRV_LENGTHOF(expected)); 728 // Reset, and we should get the same result. 729 logln("after iter.reset()"); 730 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); 731 } 732 733 void UCharsTrieTest::TestIteratorFromUChars() { 734 static const StringAndValue data[]={ 735 { "mm", 3 }, 736 { "mmm", 33 }, 737 { "mmnop", 333 } 738 }; 739 builder_->clear(); 740 IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()"); 741 for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) { 742 builder_->add(data[i].s, data[i].value, errorCode); 743 } 744 UnicodeString trieUChars; 745 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); 746 UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode); 747 checkIterator(iter, data, UPRV_LENGTHOF(data)); 748 } 749 750 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { 751 logln("checkData(dataLength=%d, fast)", (int)dataLength); 752 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); 753 logln("checkData(dataLength=%d, small)", (int)dataLength); 754 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); 755 } 756 757 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { 758 LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption)); 759 if(trie.isNull()) { 760 return; // buildTrie() reported an error 761 } 762 checkFirst(*trie, data, dataLength); 763 checkNext(*trie, data, dataLength); 764 checkNextWithState(*trie, data, dataLength); 765 checkNextString(*trie, data, dataLength); 766 checkIterator(*trie, data, dataLength); 767 } 768 769 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, 770 UStringTrieBuildOption buildOption) { 771 IcuTestErrorCode errorCode(*this, "buildTrie()"); 772 // Add the items to the trie builder in an interesting (not trivial, not random) order. 773 int32_t index, step; 774 if(dataLength&1) { 775 // Odd number of items. 776 index=dataLength/2; 777 step=2; 778 } else if((dataLength%3)!=0) { 779 // Not a multiple of 3. 780 index=dataLength/5; 781 step=3; 782 } else { 783 index=dataLength-1; 784 step=-1; 785 } 786 builder_->clear(); 787 for(int32_t i=0; i<dataLength; ++i) { 788 builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(), 789 data[index].value, errorCode); 790 index=(index+step)%dataLength; 791 } 792 UnicodeString trieUChars; 793 builder_->buildUnicodeString(buildOption, trieUChars, errorCode); 794 LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode)); 795 if(!errorCode.errIfFailureAndReset("add()/build()")) { 796 builder_->add("zzz", 999, errorCode); 797 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { 798 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); 799 } 800 } 801 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); 802 UnicodeString trieUChars2; 803 builder_->buildUnicodeString(buildOption, trieUChars2, errorCode); 804 if(trieUChars.getBuffer()==trieUChars2.getBuffer()) { 805 errln("builder.buildUnicodeString() before & after build() returned same array"); 806 } 807 if(errorCode.isFailure()) { 808 return NULL; 809 } 810 // Tries from either build() method should be identical but 811 // UCharsTrie does not implement equals(). 812 // We just return either one. 813 if((dataLength&1)!=0) { 814 return trie.orphan(); 815 } else { 816 return new UCharsTrie(trieUChars2.getBuffer()); 817 } 818 } 819 820 void UCharsTrieTest::checkFirst(UCharsTrie &trie, 821 const StringAndValue data[], int32_t dataLength) { 822 for(int32_t i=0; i<dataLength; ++i) { 823 if(*data[i].s==0) { 824 continue; // skip empty string 825 } 826 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 827 UChar32 c=expectedString[0]; 828 UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0; 829 UStringTrieResult firstResult=trie.first(c); 830 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; 831 UStringTrieResult nextResult=trie.next(nextCp); 832 if(firstResult!=trie.reset().next(c) || 833 firstResult!=trie.current() || 834 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || 835 nextResult!=trie.next(nextCp) 836 ) { 837 errln("trie.first(U+%04X)!=trie.reset().next(same) for %s", 838 c, data[i].s); 839 } 840 c=expectedString.char32At(0); 841 int32_t cLength=U16_LENGTH(c); 842 nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0; 843 firstResult=trie.firstForCodePoint(c); 844 firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; 845 nextResult=trie.nextForCodePoint(nextCp); 846 if(firstResult!=trie.reset().nextForCodePoint(c) || 847 firstResult!=trie.current() || 848 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || 849 nextResult!=trie.nextForCodePoint(nextCp) 850 ) { 851 errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s", 852 c, data[i].s); 853 } 854 } 855 trie.reset(); 856 } 857 858 void UCharsTrieTest::checkNext(UCharsTrie &trie, 859 const StringAndValue data[], int32_t dataLength) { 860 UCharsTrie::State state; 861 for(int32_t i=0; i<dataLength; ++i) { 862 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 863 int32_t stringLength= (i&1) ? -1 : expectedString.length(); 864 UStringTrieResult result; 865 if( !USTRINGTRIE_HAS_VALUE( 866 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) || 867 result!=trie.current() 868 ) { 869 errln("trie does not seem to contain %s", data[i].s); 870 } else if(trie.getValue()!=data[i].value) { 871 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 872 data[i].s, 873 (long)trie.getValue(), (long)trie.getValue(), 874 (long)data[i].value, (long)data[i].value); 875 } else if(result!=trie.current() || trie.getValue()!=data[i].value) { 876 errln("trie value for %s changes when repeating current()/getValue()", data[i].s); 877 } 878 trie.reset(); 879 stringLength=expectedString.length(); 880 result=trie.current(); 881 for(int32_t j=0; j<stringLength; ++j) { 882 if(!USTRINGTRIE_HAS_NEXT(result)) { 883 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j); 884 break; 885 } 886 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) { 887 trie.getValue(); 888 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) { 889 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j); 890 break; 891 } 892 } 893 result=trie.next(expectedString[j]); 894 if(!USTRINGTRIE_MATCHES(result)) { 895 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j); 896 break; 897 } 898 if(result!=trie.current()) { 899 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j); 900 break; 901 } 902 } 903 if(!USTRINGTRIE_HAS_VALUE(result)) { 904 errln("trie.next()!=hasValue at the end of %s", data[i].s); 905 continue; 906 } 907 trie.getValue(); 908 if(result!=trie.current()) { 909 errln("trie.current() != current()+getValue()+current() after end of %s", 910 data[i].s); 911 } 912 // Compare the final current() with whether next() can actually continue. 913 trie.saveState(state); 914 UBool nextContinues=FALSE; 915 for(int32_t c=0x20; c<0xe000; ++c) { 916 if(c==0x80) { 917 c=0xd800; // Check for ASCII and surrogates but not all of the BMP. 918 } 919 if(trie.resetToState(state).next(c)) { 920 nextContinues=TRUE; 921 break; 922 } 923 } 924 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) { 925 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts " 926 "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s); 927 } 928 trie.reset(); 929 } 930 } 931 932 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie, 933 const StringAndValue data[], int32_t dataLength) { 934 UCharsTrie::State noState, state; 935 for(int32_t i=0; i<dataLength; ++i) { 936 if((i&1)==0) { 937 // This should have no effect. 938 trie.resetToState(noState); 939 } 940 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 941 int32_t stringLength=expectedString.length(); 942 int32_t partialLength=stringLength/3; 943 for(int32_t j=0; j<partialLength; ++j) { 944 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) { 945 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s); 946 return; 947 } 948 } 949 trie.saveState(state); 950 UStringTrieResult resultAtState=trie.current(); 951 UStringTrieResult result; 952 int32_t valueAtState=-99; 953 if(USTRINGTRIE_HAS_VALUE(resultAtState)) { 954 valueAtState=trie.getValue(); 955 } 956 result=trie.next(0); // mismatch 957 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) { 958 errln("trie.next(0) matched after part of %s", data[i].s); 959 } 960 if( resultAtState!=trie.resetToState(state).current() || 961 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue()) 962 ) { 963 errln("trie.next(part of %s) changes current()/getValue() after " 964 "saveState/next(0)/resetToState", 965 data[i].s); 966 } else if(!USTRINGTRIE_HAS_VALUE( 967 result=trie.next(expectedString.getTerminatedBuffer()+partialLength, 968 stringLength-partialLength)) || 969 result!=trie.current()) { 970 errln("trie.next(rest of %s) does not seem to contain %s after " 971 "saveState/next(0)/resetToState", 972 data[i].s, data[i].s); 973 } else if(!USTRINGTRIE_HAS_VALUE( 974 result=trie.resetToState(state). 975 next(expectedString.getTerminatedBuffer()+partialLength, 976 stringLength-partialLength)) || 977 result!=trie.current()) { 978 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState", 979 data[i].s); 980 } else if(trie.getValue()!=data[i].value) { 981 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 982 data[i].s, 983 (long)trie.getValue(), (long)trie.getValue(), 984 (long)data[i].value, (long)data[i].value); 985 } 986 trie.reset(); 987 } 988 } 989 990 // next(string) is also tested in other functions, 991 // but here we try to go partway through the string, and then beyond it. 992 void UCharsTrieTest::checkNextString(UCharsTrie &trie, 993 const StringAndValue data[], int32_t dataLength) { 994 for(int32_t i=0; i<dataLength; ++i) { 995 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 996 int32_t stringLength=expectedString.length(); 997 if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) { 998 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s); 999 continue; 1000 } 1001 // Test that we stop properly at the end of the string. 1002 if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2, 1003 stringLength+1-stringLength/2)) { 1004 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s); 1005 } 1006 trie.reset(); 1007 } 1008 } 1009 1010 void UCharsTrieTest::checkIterator(UCharsTrie &trie, 1011 const StringAndValue data[], int32_t dataLength) { 1012 IcuTestErrorCode errorCode(*this, "checkIterator()"); 1013 UCharsTrie::Iterator iter(trie, 0, errorCode); 1014 if(errorCode.errIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) { 1015 return; 1016 } 1017 checkIterator(iter, data, dataLength); 1018 } 1019 1020 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter, 1021 const StringAndValue data[], int32_t dataLength) { 1022 IcuTestErrorCode errorCode(*this, "checkIterator()"); 1023 for(int32_t i=0; i<dataLength; ++i) { 1024 if(!iter.hasNext()) { 1025 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s); 1026 break; 1027 } 1028 UBool hasNext=iter.next(errorCode); 1029 if(errorCode.errIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) { 1030 break; 1031 } 1032 if(!hasNext) { 1033 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s); 1034 break; 1035 } 1036 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 1037 if(iter.getString()!=expectedString) { 1038 char buffer[1000]; 1039 UnicodeString invString(prettify(iter.getString())); 1040 invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV); 1041 errln("trie iterator next().getString()=%s but expected %s for item %d", 1042 buffer, data[i].s, (int)i); 1043 } 1044 if(iter.getValue()!=data[i].value) { 1045 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s", 1046 (long)iter.getValue(), (long)iter.getValue(), 1047 (long)data[i].value, (long)data[i].value, 1048 (int)i, data[i].s); 1049 } 1050 } 1051 if(iter.hasNext()) { 1052 errln("trie iterator hasNext()=TRUE after all items"); 1053 } 1054 UBool hasNext=iter.next(errorCode); 1055 errorCode.errIfFailureAndReset("trie iterator next() after all items"); 1056 if(hasNext) { 1057 errln("trie iterator next()=TRUE after all items"); 1058 } 1059 } 1060