1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: ucharstrietest.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2010nov16 12 * created by: Markus W. Scherer 13 */ 14 15 #include <string.h> 16 17 #include "unicode/utypes.h" 18 #include "unicode/appendable.h" 19 #include "unicode/localpointer.h" 20 #include "unicode/ucharstrie.h" 21 #include "unicode/ucharstriebuilder.h" 22 #include "unicode/uniset.h" 23 #include "unicode/unistr.h" 24 #include "intltest.h" 25 #include "cmemory.h" 26 27 struct StringAndValue { 28 const char *s; 29 int32_t value; 30 }; 31 32 class UCharsTrieTest : public IntlTest { 33 public: 34 UCharsTrieTest(); 35 virtual ~UCharsTrieTest(); 36 37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); 38 void TestBuilder(); 39 void TestEmpty(); 40 void Test_a(); 41 void Test_a_ab(); 42 void TestShortestBranch(); 43 void TestBranches(); 44 void TestLongSequence(); 45 void TestLongBranch(); 46 void TestValuesForState(); 47 void TestCompact(); 48 void TestFirstForCodePoint(); 49 void TestNextForCodePoint(); 50 51 UCharsTrie *buildLargeTrie(int32_t numUniqueFirst); 52 void TestLargeTrie(); 53 54 UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); 55 void TestHasUniqueValue(); 56 void TestGetNextUChars(); 57 void TestIteratorFromBranch(); 58 void TestIteratorFromLinearMatch(); 59 void TestTruncatingIteratorFromRoot(); 60 void TestTruncatingIteratorFromLinearMatchShort(); 61 void TestTruncatingIteratorFromLinearMatchLong(); 62 void TestIteratorFromUChars(); 63 64 void checkData(const StringAndValue data[], int32_t dataLength); 65 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); 66 UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength, 67 UStringTrieBuildOption buildOption); 68 void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 69 void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 70 void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 71 void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 72 void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 73 void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); 74 75 private: 76 UCharsTrieBuilder *builder_; 77 }; 78 79 extern IntlTest *createUCharsTrieTest() { 80 return new UCharsTrieTest(); 81 } 82 83 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) { 84 IcuTestErrorCode errorCode(*this, "UCharsTrieTest()"); 85 builder_=new UCharsTrieBuilder(errorCode); 86 } 87 88 UCharsTrieTest::~UCharsTrieTest() { 89 delete builder_; 90 } 91 92 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { 93 if(exec) { 94 logln("TestSuite UCharsTrieTest: "); 95 } 96 TESTCASE_AUTO_BEGIN; 97 TESTCASE_AUTO(TestBuilder); 98 TESTCASE_AUTO(TestEmpty); 99 TESTCASE_AUTO(Test_a); 100 TESTCASE_AUTO(Test_a_ab); 101 TESTCASE_AUTO(TestShortestBranch); 102 TESTCASE_AUTO(TestBranches); 103 TESTCASE_AUTO(TestLongSequence); 104 TESTCASE_AUTO(TestLongBranch); 105 TESTCASE_AUTO(TestValuesForState); 106 TESTCASE_AUTO(TestCompact); 107 TESTCASE_AUTO(TestFirstForCodePoint); 108 TESTCASE_AUTO(TestNextForCodePoint); 109 TESTCASE_AUTO(TestLargeTrie); 110 TESTCASE_AUTO(TestHasUniqueValue); 111 TESTCASE_AUTO(TestGetNextUChars); 112 TESTCASE_AUTO(TestIteratorFromBranch); 113 TESTCASE_AUTO(TestIteratorFromLinearMatch); 114 TESTCASE_AUTO(TestTruncatingIteratorFromRoot); 115 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); 116 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); 117 TESTCASE_AUTO(TestIteratorFromUChars); 118 TESTCASE_AUTO_END; 119 } 120 121 void UCharsTrieTest::TestBuilder() { 122 IcuTestErrorCode errorCode(*this, "TestBuilder()"); 123 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); 124 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { 125 errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); 126 return; 127 } 128 // TODO: remove .build(...) once add() checks for duplicates. 129 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); 130 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { 131 errln("UCharsTrieBuilder.add() did not detect duplicates"); 132 return; 133 } 134 } 135 136 void UCharsTrieTest::TestEmpty() { 137 static const StringAndValue data[]={ 138 { "", 0 } 139 }; 140 checkData(data, UPRV_LENGTHOF(data)); 141 } 142 143 void UCharsTrieTest::Test_a() { 144 static const StringAndValue data[]={ 145 { "a", 1 } 146 }; 147 checkData(data, UPRV_LENGTHOF(data)); 148 } 149 150 void UCharsTrieTest::Test_a_ab() { 151 static const StringAndValue data[]={ 152 { "a", 1 }, 153 { "ab", 100 } 154 }; 155 checkData(data, UPRV_LENGTHOF(data)); 156 } 157 158 void UCharsTrieTest::TestShortestBranch() { 159 static const StringAndValue data[]={ 160 { "a", 1000 }, 161 { "b", 2000 } 162 }; 163 checkData(data, UPRV_LENGTHOF(data)); 164 } 165 166 void UCharsTrieTest::TestBranches() { 167 static const StringAndValue data[]={ 168 { "a", 0x10 }, 169 { "cc", 0x40 }, 170 { "e", 0x100 }, 171 { "ggg", 0x400 }, 172 { "i", 0x1000 }, 173 { "kkkk", 0x4000 }, 174 { "n", 0x10000 }, 175 { "ppppp", 0x40000 }, 176 { "r", 0x100000 }, 177 { "sss", 0x200000 }, 178 { "t", 0x400000 }, 179 { "uu", 0x800000 }, 180 { "vv", 0x7fffffff }, 181 { "zz", (int32_t)0x80000000 } 182 }; 183 for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) { 184 logln("TestBranches length=%d", (int)length); 185 checkData(data, length); 186 } 187 } 188 189 void UCharsTrieTest::TestLongSequence() { 190 static const StringAndValue data[]={ 191 { "a", -1 }, 192 // sequence of linear-match nodes 193 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 }, 194 // more than 256 units 195 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 196 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 197 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 198 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 199 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 200 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 } 201 }; 202 checkData(data, UPRV_LENGTHOF(data)); 203 } 204 205 void UCharsTrieTest::TestLongBranch() { 206 // Split-branch and interesting compact-integer values. 207 static const StringAndValue data[]={ 208 { "a", -2 }, 209 { "b", -1 }, 210 { "c", 0 }, 211 { "d2", 1 }, 212 { "f", 0x3f }, 213 { "g", 0x40 }, 214 { "h", 0x41 }, 215 { "j23", 0x1900 }, 216 { "j24", 0x19ff }, 217 { "j25", 0x1a00 }, 218 { "k2", 0x1a80 }, 219 { "k3", 0x1aff }, 220 { "l234567890", 0x1b00 }, 221 { "l234567890123", 0x1b01 }, 222 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff }, 223 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 }, 224 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 }, 225 { "r", 0x333333 }, 226 { "s2345", 0x4444444 }, 227 { "t234567890", 0x77777777 }, 228 { "z", (int32_t)0x80000001 } 229 }; 230 checkData(data, UPRV_LENGTHOF(data)); 231 } 232 233 void UCharsTrieTest::TestValuesForState() { 234 // Check that saveState() and resetToState() interact properly 235 // with next() and current(). 236 static const StringAndValue data[]={ 237 { "a", -1 }, 238 { "ab", -2 }, 239 { "abc", -3 }, 240 { "abcd", -4 }, 241 { "abcde", -5 }, 242 { "abcdef", -6 } 243 }; 244 checkData(data, UPRV_LENGTHOF(data)); 245 } 246 247 void UCharsTrieTest::TestCompact() { 248 // Duplicate trailing strings and values provide opportunities for compacting. 249 static const StringAndValue data[]={ 250 { "+", 0 }, 251 { "+august", 8 }, 252 { "+december", 12 }, 253 { "+july", 7 }, 254 { "+june", 6 }, 255 { "+november", 11 }, 256 { "+october", 10 }, 257 { "+september", 9 }, 258 { "-", 0 }, 259 { "-august", 8 }, 260 { "-december", 12 }, 261 { "-july", 7 }, 262 { "-june", 6 }, 263 { "-november", 11 }, 264 { "-october", 10 }, 265 { "-september", 9 }, 266 // The l+n branch (with its sub-nodes) is a duplicate but will be written 267 // both times because each time it follows a different linear-match node. 268 { "xjuly", 7 }, 269 { "xjune", 6 } 270 }; 271 checkData(data, UPRV_LENGTHOF(data)); 272 } 273 274 void UCharsTrieTest::TestFirstForCodePoint() { 275 static const StringAndValue data[]={ 276 { "a", 1 }, 277 { "a\\ud800", 2 }, 278 { "a\\U00010000", 3 }, 279 { "\\ud840", 4 }, 280 { "\\U00020000\\udbff", 5 }, 281 { "\\U00020000\\U0010ffff", 6 }, 282 { "\\U00020000\\U0010ffffz", 7 }, 283 { "\\U00050000xy", 8 }, 284 { "\\U00050000xyz", 9 } 285 }; 286 checkData(data, UPRV_LENGTHOF(data)); 287 } 288 289 void UCharsTrieTest::TestNextForCodePoint() { 290 static const StringAndValue data[]={ 291 { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 }, 292 { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 }, 293 { "\\u4dff\\U000103ff", 99999 } 294 }; 295 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 296 if(trie.isNull()) { 297 return; // buildTrie() reported an error 298 } 299 UStringTrieResult result; 300 if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 301 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 302 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 303 (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 304 (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 305 (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || 306 trie->getValue()!=2000000000 307 ) { 308 errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s); 309 } 310 if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 311 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 312 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 313 (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || 314 trie->getValue()!=44444 315 ) { 316 errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s); 317 } 318 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 319 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 320 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 321 (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate 322 ) { 323 errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222"); 324 } 325 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 326 (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || 327 trie->getValue()!=99999 328 ) { 329 errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s); 330 } 331 } 332 333 // Definitions in the anonymous namespace are invisible outside this file. 334 namespace { 335 336 // Generate (string, value) pairs. 337 // The first string (before next()) will be empty. 338 class Generator { 339 public: 340 Generator() : value(4711), num(0) {} 341 void next() { 342 UChar c; 343 s.truncate(0); 344 s.append(c=(UChar)(value>>16)); 345 s.append((UChar)(value>>4)); 346 if(value&1) { 347 s.append((UChar)value); 348 } 349 set.add(c); 350 value+=((value>>5)&0x7ff)*3+1; 351 ++num; 352 } 353 const UnicodeString &getString() const { return s; } 354 int32_t getValue() const { return value; } 355 int32_t countUniqueFirstChars() const { return set.size(); } 356 int32_t getIndex() const { return num; } 357 358 private: 359 UnicodeString s; 360 UnicodeSet set; 361 int32_t value; 362 int32_t num; 363 }; 364 365 } // end namespace 366 367 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) { 368 IcuTestErrorCode errorCode(*this, "buildLargeTrie()"); 369 Generator gen; 370 builder_->clear(); 371 while(gen.countUniqueFirstChars()<numUniqueFirst) { 372 builder_->add(gen.getString(), gen.getValue(), errorCode); 373 gen.next(); 374 } 375 logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex()); 376 UnicodeString trieUChars; 377 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); 378 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); 379 return new UCharsTrie(trieUChars.getBuffer()); 380 } 381 382 // Exercise a large branch node. 383 void UCharsTrieTest::TestLargeTrie() { 384 LocalPointer<UCharsTrie> trie(buildLargeTrie(1111)); 385 if(trie.isNull()) { 386 return; // buildTrie() reported an error 387 } 388 Generator gen; 389 while(gen.countUniqueFirstChars()<1111) { 390 UnicodeString x(gen.getString()); 391 int32_t value=gen.getValue(); 392 if(!x.isEmpty()) { 393 if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) { 394 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n", 395 x[0], (long)gen.getIndex()); 396 break; 397 } 398 x.remove(0, 1); 399 } 400 UStringTrieResult result=trie->next(x.getBuffer(), x.length()); 401 if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) { 402 errln("next(%d chars U+%04X U+%04X)!=hasValue or " 403 "next()!=current() or getValue() wrong " 404 "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex()); 405 break; 406 } 407 gen.next(); 408 } 409 } 410 411 enum { 412 u_a=0x61, 413 u_b=0x62, 414 u_c=0x63, 415 u_j=0x6a, 416 u_n=0x6e, 417 u_r=0x72, 418 u_u=0x75, 419 u_y=0x79 420 }; 421 422 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { 423 // All types of nodes leading to the same value, 424 // for code coverage of recursive functions. 425 // In particular, we need a lot of branches on some single level 426 // to exercise a split-branch node. 427 static const StringAndValue data[]={ 428 { "august", 8 }, 429 { "jan", 1 }, 430 { "jan.", 1 }, 431 { "jana", 1 }, 432 { "janbb", 1 }, 433 { "janc", 1 }, 434 { "janddd", 1 }, 435 { "janee", 1 }, 436 { "janef", 1 }, 437 { "janf", 1 }, 438 { "jangg", 1 }, 439 { "janh", 1 }, 440 { "janiiii", 1 }, 441 { "janj", 1 }, 442 { "jankk", 1 }, 443 { "jankl", 1 }, 444 { "jankmm", 1 }, 445 { "janl", 1 }, 446 { "janm", 1 }, 447 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 448 { "jano", 1 }, 449 { "janpp", 1 }, 450 { "janqqq", 1 }, 451 { "janr", 1 }, 452 { "januar", 1 }, 453 { "january", 1 }, 454 { "july", 7 }, 455 { "jun", 6 }, 456 { "jun.", 6 }, 457 { "june", 6 } 458 }; 459 return buildTrie(data, UPRV_LENGTHOF(data), buildOption); 460 } 461 462 void UCharsTrieTest::TestHasUniqueValue() { 463 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 464 if(trie.isNull()) { 465 return; // buildTrie() reported an error 466 } 467 int32_t uniqueValue; 468 if(trie->hasUniqueValue(uniqueValue)) { 469 errln("unique value at root"); 470 } 471 trie->next(u_j); 472 trie->next(u_a); 473 trie->next(u_n); 474 // hasUniqueValue() directly after next() 475 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { 476 errln("not unique value 1 after \"jan\""); 477 } 478 trie->first(u_j); 479 trie->next(u_u); 480 if(trie->hasUniqueValue(uniqueValue)) { 481 errln("unique value after \"ju\""); 482 } 483 if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { 484 errln("not normal value 6 after \"jun\""); 485 } 486 // hasUniqueValue() after getValue() 487 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { 488 errln("not unique value 6 after \"jun\""); 489 } 490 // hasUniqueValue() from within a linear-match node 491 trie->first(u_a); 492 trie->next(u_u); 493 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { 494 errln("not unique value 8 after \"au\""); 495 } 496 } 497 498 void UCharsTrieTest::TestGetNextUChars() { 499 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 500 if(trie.isNull()) { 501 return; // buildTrie() reported an error 502 } 503 UnicodeString buffer; 504 UnicodeStringAppendable app(buffer); 505 int32_t count=trie->getNextUChars(app); 506 if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) { 507 errln("months getNextUChars()!=[aj] at root"); 508 } 509 trie->next(u_j); 510 trie->next(u_a); 511 trie->next(u_n); 512 // getNextUChars() directly after next() 513 buffer.remove(); 514 count=trie->getNextUChars(app); 515 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { 516 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\""); 517 } 518 // getNextUChars() after getValue() 519 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. 520 buffer.remove(); 521 count=trie->getNextUChars(app); 522 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { 523 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); 524 } 525 // getNextUChars() from a linear-match node 526 trie->next(u_u); 527 buffer.remove(); 528 count=trie->getNextUChars(app); 529 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) { 530 errln("months getNextUChars()!=[a] after \"janu\""); 531 } 532 trie->next(u_a); 533 buffer.remove(); 534 count=trie->getNextUChars(app); 535 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) { 536 errln("months getNextUChars()!=[r] after \"janua\""); 537 } 538 trie->next(u_r); 539 trie->next(u_y); 540 // getNextUChars() after a final match 541 buffer.remove(); 542 count=trie->getNextUChars(app); 543 if(count!=0 || buffer.length()!=0) { 544 errln("months getNextUChars()!=[] after \"january\""); 545 } 546 } 547 548 void UCharsTrieTest::TestIteratorFromBranch() { 549 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 550 if(trie.isNull()) { 551 return; // buildTrie() reported an error 552 } 553 // Go to a branch node. 554 trie->next(u_j); 555 trie->next(u_a); 556 trie->next(u_n); 557 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); 558 UCharsTrie::Iterator iter(*trie, 0, errorCode); 559 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 560 return; 561 } 562 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 563 // following "jan". 564 static const StringAndValue data[]={ 565 { "", 1 }, 566 { ".", 1 }, 567 { "a", 1 }, 568 { "bb", 1 }, 569 { "c", 1 }, 570 { "ddd", 1 }, 571 { "ee", 1 }, 572 { "ef", 1 }, 573 { "f", 1 }, 574 { "gg", 1 }, 575 { "h", 1 }, 576 { "iiii", 1 }, 577 { "j", 1 }, 578 { "kk", 1 }, 579 { "kl", 1 }, 580 { "kmm", 1 }, 581 { "l", 1 }, 582 { "m", 1 }, 583 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 584 { "o", 1 }, 585 { "pp", 1 }, 586 { "qqq", 1 }, 587 { "r", 1 }, 588 { "uar", 1 }, 589 { "uary", 1 } 590 }; 591 checkIterator(iter, data, UPRV_LENGTHOF(data)); 592 // Reset, and we should get the same result. 593 logln("after iter.reset()"); 594 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 595 } 596 597 void UCharsTrieTest::TestIteratorFromLinearMatch() { 598 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 599 if(trie.isNull()) { 600 return; // buildTrie() reported an error 601 } 602 // Go into a linear-match node. 603 trie->next(u_j); 604 trie->next(u_a); 605 trie->next(u_n); 606 trie->next(u_u); 607 trie->next(u_a); 608 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); 609 UCharsTrie::Iterator iter(*trie, 0, errorCode); 610 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 611 return; 612 } 613 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 614 // following "janua". 615 static const StringAndValue data[]={ 616 { "r", 1 }, 617 { "ry", 1 } 618 }; 619 checkIterator(iter, data, UPRV_LENGTHOF(data)); 620 // Reset, and we should get the same result. 621 logln("after iter.reset()"); 622 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 623 } 624 625 void UCharsTrieTest::TestTruncatingIteratorFromRoot() { 626 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 627 if(trie.isNull()) { 628 return; // buildTrie() reported an error 629 } 630 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); 631 UCharsTrie::Iterator iter(*trie, 4, errorCode); 632 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 633 return; 634 } 635 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters 636 // of each string, and no string duplicates from the truncation. 637 static const StringAndValue data[]={ 638 { "augu", -1 }, 639 { "jan", 1 }, 640 { "jan.", 1 }, 641 { "jana", 1 }, 642 { "janb", -1 }, 643 { "janc", 1 }, 644 { "jand", -1 }, 645 { "jane", -1 }, 646 { "janf", 1 }, 647 { "jang", -1 }, 648 { "janh", 1 }, 649 { "jani", -1 }, 650 { "janj", 1 }, 651 { "jank", -1 }, 652 { "janl", 1 }, 653 { "janm", 1 }, 654 { "jann", -1 }, 655 { "jano", 1 }, 656 { "janp", -1 }, 657 { "janq", -1 }, 658 { "janr", 1 }, 659 { "janu", -1 }, 660 { "july", 7 }, 661 { "jun", 6 }, 662 { "jun.", 6 }, 663 { "june", 6 } 664 }; 665 checkIterator(iter, data, UPRV_LENGTHOF(data)); 666 // Reset, and we should get the same result. 667 logln("after iter.reset()"); 668 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 669 } 670 671 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() { 672 static const StringAndValue data[]={ 673 { "abcdef", 10 }, 674 { "abcdepq", 200 }, 675 { "abcdeyz", 3000 } 676 }; 677 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 678 if(trie.isNull()) { 679 return; // buildTrie() reported an error 680 } 681 // Go into a linear-match node. 682 trie->next(u_a); 683 trie->next(u_b); 684 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); 685 // Truncate within the linear-match node. 686 UCharsTrie::Iterator iter(*trie, 2, errorCode); 687 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 688 return; 689 } 690 static const StringAndValue expected[]={ 691 { "cd", -1 } 692 }; 693 checkIterator(iter, expected, UPRV_LENGTHOF(expected)); 694 // Reset, and we should get the same result. 695 logln("after iter.reset()"); 696 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); 697 } 698 699 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() { 700 static const StringAndValue data[]={ 701 { "abcdef", 10 }, 702 { "abcdepq", 200 }, 703 { "abcdeyz", 3000 } 704 }; 705 LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 706 if(trie.isNull()) { 707 return; // buildTrie() reported an error 708 } 709 // Go into a linear-match node. 710 trie->next(u_a); 711 trie->next(u_b); 712 trie->next(u_c); 713 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); 714 // Truncate after the linear-match node. 715 UCharsTrie::Iterator iter(*trie, 3, errorCode); 716 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 717 return; 718 } 719 static const StringAndValue expected[]={ 720 { "def", 10 }, 721 { "dep", -1 }, 722 { "dey", -1 } 723 }; 724 checkIterator(iter, expected, UPRV_LENGTHOF(expected)); 725 // Reset, and we should get the same result. 726 logln("after iter.reset()"); 727 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); 728 } 729 730 void UCharsTrieTest::TestIteratorFromUChars() { 731 static const StringAndValue data[]={ 732 { "mm", 3 }, 733 { "mmm", 33 }, 734 { "mmnop", 333 } 735 }; 736 builder_->clear(); 737 IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()"); 738 for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) { 739 builder_->add(data[i].s, data[i].value, errorCode); 740 } 741 UnicodeString trieUChars; 742 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); 743 UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode); 744 checkIterator(iter, data, UPRV_LENGTHOF(data)); 745 } 746 747 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { 748 logln("checkData(dataLength=%d, fast)", (int)dataLength); 749 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); 750 logln("checkData(dataLength=%d, small)", (int)dataLength); 751 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); 752 } 753 754 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { 755 LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption)); 756 if(trie.isNull()) { 757 return; // buildTrie() reported an error 758 } 759 checkFirst(*trie, data, dataLength); 760 checkNext(*trie, data, dataLength); 761 checkNextWithState(*trie, data, dataLength); 762 checkNextString(*trie, data, dataLength); 763 checkIterator(*trie, data, dataLength); 764 } 765 766 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, 767 UStringTrieBuildOption buildOption) { 768 IcuTestErrorCode errorCode(*this, "buildTrie()"); 769 // Add the items to the trie builder in an interesting (not trivial, not random) order. 770 int32_t index, step; 771 if(dataLength&1) { 772 // Odd number of items. 773 index=dataLength/2; 774 step=2; 775 } else if((dataLength%3)!=0) { 776 // Not a multiple of 3. 777 index=dataLength/5; 778 step=3; 779 } else { 780 index=dataLength-1; 781 step=-1; 782 } 783 builder_->clear(); 784 for(int32_t i=0; i<dataLength; ++i) { 785 builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(), 786 data[index].value, errorCode); 787 index=(index+step)%dataLength; 788 } 789 UnicodeString trieUChars; 790 builder_->buildUnicodeString(buildOption, trieUChars, errorCode); 791 LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode)); 792 if(!errorCode.logIfFailureAndReset("add()/build()")) { 793 builder_->add("zzz", 999, errorCode); 794 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { 795 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); 796 } 797 } 798 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); 799 UnicodeString trieUChars2; 800 builder_->buildUnicodeString(buildOption, trieUChars2, errorCode); 801 if(trieUChars.getBuffer()==trieUChars2.getBuffer()) { 802 errln("builder.buildUnicodeString() before & after build() returned same array"); 803 } 804 if(errorCode.isFailure()) { 805 return NULL; 806 } 807 // Tries from either build() method should be identical but 808 // UCharsTrie does not implement equals(). 809 // We just return either one. 810 if((dataLength&1)!=0) { 811 return trie.orphan(); 812 } else { 813 return new UCharsTrie(trieUChars2.getBuffer()); 814 } 815 } 816 817 void UCharsTrieTest::checkFirst(UCharsTrie &trie, 818 const StringAndValue data[], int32_t dataLength) { 819 for(int32_t i=0; i<dataLength; ++i) { 820 if(*data[i].s==0) { 821 continue; // skip empty string 822 } 823 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 824 UChar32 c=expectedString[0]; 825 UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0; 826 UStringTrieResult firstResult=trie.first(c); 827 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; 828 UStringTrieResult nextResult=trie.next(nextCp); 829 if(firstResult!=trie.reset().next(c) || 830 firstResult!=trie.current() || 831 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || 832 nextResult!=trie.next(nextCp) 833 ) { 834 errln("trie.first(U+%04X)!=trie.reset().next(same) for %s", 835 c, data[i].s); 836 } 837 c=expectedString.char32At(0); 838 int32_t cLength=U16_LENGTH(c); 839 nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0; 840 firstResult=trie.firstForCodePoint(c); 841 firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; 842 nextResult=trie.nextForCodePoint(nextCp); 843 if(firstResult!=trie.reset().nextForCodePoint(c) || 844 firstResult!=trie.current() || 845 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || 846 nextResult!=trie.nextForCodePoint(nextCp) 847 ) { 848 errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s", 849 c, data[i].s); 850 } 851 } 852 trie.reset(); 853 } 854 855 void UCharsTrieTest::checkNext(UCharsTrie &trie, 856 const StringAndValue data[], int32_t dataLength) { 857 UCharsTrie::State state; 858 for(int32_t i=0; i<dataLength; ++i) { 859 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 860 int32_t stringLength= (i&1) ? -1 : expectedString.length(); 861 UStringTrieResult result; 862 if( !USTRINGTRIE_HAS_VALUE( 863 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) || 864 result!=trie.current() 865 ) { 866 errln("trie does not seem to contain %s", data[i].s); 867 } else if(trie.getValue()!=data[i].value) { 868 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 869 data[i].s, 870 (long)trie.getValue(), (long)trie.getValue(), 871 (long)data[i].value, (long)data[i].value); 872 } else if(result!=trie.current() || trie.getValue()!=data[i].value) { 873 errln("trie value for %s changes when repeating current()/getValue()", data[i].s); 874 } 875 trie.reset(); 876 stringLength=expectedString.length(); 877 result=trie.current(); 878 for(int32_t j=0; j<stringLength; ++j) { 879 if(!USTRINGTRIE_HAS_NEXT(result)) { 880 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j); 881 break; 882 } 883 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) { 884 trie.getValue(); 885 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) { 886 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j); 887 break; 888 } 889 } 890 result=trie.next(expectedString[j]); 891 if(!USTRINGTRIE_MATCHES(result)) { 892 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j); 893 break; 894 } 895 if(result!=trie.current()) { 896 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j); 897 break; 898 } 899 } 900 if(!USTRINGTRIE_HAS_VALUE(result)) { 901 errln("trie.next()!=hasValue at the end of %s", data[i].s); 902 continue; 903 } 904 trie.getValue(); 905 if(result!=trie.current()) { 906 errln("trie.current() != current()+getValue()+current() after end of %s", 907 data[i].s); 908 } 909 // Compare the final current() with whether next() can actually continue. 910 trie.saveState(state); 911 UBool nextContinues=FALSE; 912 for(int32_t c=0x20; c<0xe000; ++c) { 913 if(c==0x80) { 914 c=0xd800; // Check for ASCII and surrogates but not all of the BMP. 915 } 916 if(trie.resetToState(state).next(c)) { 917 nextContinues=TRUE; 918 break; 919 } 920 } 921 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) { 922 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts " 923 "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s); 924 } 925 trie.reset(); 926 } 927 } 928 929 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie, 930 const StringAndValue data[], int32_t dataLength) { 931 UCharsTrie::State noState, state; 932 for(int32_t i=0; i<dataLength; ++i) { 933 if((i&1)==0) { 934 // This should have no effect. 935 trie.resetToState(noState); 936 } 937 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 938 int32_t stringLength=expectedString.length(); 939 int32_t partialLength=stringLength/3; 940 for(int32_t j=0; j<partialLength; ++j) { 941 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) { 942 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s); 943 return; 944 } 945 } 946 trie.saveState(state); 947 UStringTrieResult resultAtState=trie.current(); 948 UStringTrieResult result; 949 int32_t valueAtState=-99; 950 if(USTRINGTRIE_HAS_VALUE(resultAtState)) { 951 valueAtState=trie.getValue(); 952 } 953 result=trie.next(0); // mismatch 954 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) { 955 errln("trie.next(0) matched after part of %s", data[i].s); 956 } 957 if( resultAtState!=trie.resetToState(state).current() || 958 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue()) 959 ) { 960 errln("trie.next(part of %s) changes current()/getValue() after " 961 "saveState/next(0)/resetToState", 962 data[i].s); 963 } else if(!USTRINGTRIE_HAS_VALUE( 964 result=trie.next(expectedString.getTerminatedBuffer()+partialLength, 965 stringLength-partialLength)) || 966 result!=trie.current()) { 967 errln("trie.next(rest of %s) does not seem to contain %s after " 968 "saveState/next(0)/resetToState", 969 data[i].s, data[i].s); 970 } else if(!USTRINGTRIE_HAS_VALUE( 971 result=trie.resetToState(state). 972 next(expectedString.getTerminatedBuffer()+partialLength, 973 stringLength-partialLength)) || 974 result!=trie.current()) { 975 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState", 976 data[i].s); 977 } else if(trie.getValue()!=data[i].value) { 978 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 979 data[i].s, 980 (long)trie.getValue(), (long)trie.getValue(), 981 (long)data[i].value, (long)data[i].value); 982 } 983 trie.reset(); 984 } 985 } 986 987 // next(string) is also tested in other functions, 988 // but here we try to go partway through the string, and then beyond it. 989 void UCharsTrieTest::checkNextString(UCharsTrie &trie, 990 const StringAndValue data[], int32_t dataLength) { 991 for(int32_t i=0; i<dataLength; ++i) { 992 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 993 int32_t stringLength=expectedString.length(); 994 if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) { 995 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s); 996 continue; 997 } 998 // Test that we stop properly at the end of the string. 999 if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2, 1000 stringLength+1-stringLength/2)) { 1001 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s); 1002 } 1003 trie.reset(); 1004 } 1005 } 1006 1007 void UCharsTrieTest::checkIterator(UCharsTrie &trie, 1008 const StringAndValue data[], int32_t dataLength) { 1009 IcuTestErrorCode errorCode(*this, "checkIterator()"); 1010 UCharsTrie::Iterator iter(trie, 0, errorCode); 1011 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) { 1012 return; 1013 } 1014 checkIterator(iter, data, dataLength); 1015 } 1016 1017 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter, 1018 const StringAndValue data[], int32_t dataLength) { 1019 IcuTestErrorCode errorCode(*this, "checkIterator()"); 1020 for(int32_t i=0; i<dataLength; ++i) { 1021 if(!iter.hasNext()) { 1022 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s); 1023 break; 1024 } 1025 UBool hasNext=iter.next(errorCode); 1026 if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) { 1027 break; 1028 } 1029 if(!hasNext) { 1030 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s); 1031 break; 1032 } 1033 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 1034 if(iter.getString()!=expectedString) { 1035 char buffer[1000]; 1036 UnicodeString invString(prettify(iter.getString())); 1037 invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV); 1038 errln("trie iterator next().getString()=%s but expected %s for item %d", 1039 buffer, data[i].s, (int)i); 1040 } 1041 if(iter.getValue()!=data[i].value) { 1042 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s", 1043 (long)iter.getValue(), (long)iter.getValue(), 1044 (long)data[i].value, (long)data[i].value, 1045 (int)i, data[i].s); 1046 } 1047 } 1048 if(iter.hasNext()) { 1049 errln("trie iterator hasNext()=TRUE after all items"); 1050 } 1051 UBool hasNext=iter.next(errorCode); 1052 errorCode.logIfFailureAndReset("trie iterator next() after all items"); 1053 if(hasNext) { 1054 errln("trie iterator next()=TRUE after all items"); 1055 } 1056 } 1057