1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2012, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: ucharstrietest.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2010nov16 12 * created by: Markus W. Scherer 13 */ 14 15 #include <string.h> 16 17 #include "unicode/utypes.h" 18 #include "unicode/appendable.h" 19 #include "unicode/localpointer.h" 20 #include "unicode/ucharstrie.h" 21 #include "unicode/ucharstriebuilder.h" 22 #include "unicode/uniset.h" 23 #include "unicode/unistr.h" 24 #include "intltest.h" 25 26 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 27 28 struct StringAndValue { 29 const char *s; 30 int32_t value; 31 }; 32 33 class UCharsTrieTest : public IntlTest { 34 public: 35 UCharsTrieTest(); 36 virtual ~UCharsTrieTest(); 37 38 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); 39 void TestBuilder(); 40 void TestEmpty(); 41 void Test_a(); 42 void Test_a_ab(); 43 void TestShortestBranch(); 44 void TestBranches(); 45 void TestLongSequence(); 46 void TestLongBranch(); 47 void TestValuesForState(); 48 void TestCompact(); 49 void TestFirstForCodePoint(); 50 void TestNextForCodePoint(); 51 52 UCharsTrie *buildLargeTrie(int32_t numUniqueFirst); 53 void TestLargeTrie(); 54 55 UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); 56 void TestHasUniqueValue(); 57 void TestGetNextUChars(); 58 void TestIteratorFromBranch(); 59 void TestIteratorFromLinearMatch(); 60 void TestTruncatingIteratorFromRoot(); 61 void TestTruncatingIteratorFromLinearMatchShort(); 62 void TestTruncatingIteratorFromLinearMatchLong(); 63 void TestIteratorFromUChars(); 64 65 void checkData(const StringAndValue data[], int32_t dataLength); 66 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); 67 UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength, 68 UStringTrieBuildOption buildOption); 69 void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 70 void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 71 void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 72 void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 73 void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength); 74 void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); 75 76 private: 77 UCharsTrieBuilder *builder_; 78 }; 79 80 extern IntlTest *createUCharsTrieTest() { 81 return new UCharsTrieTest(); 82 } 83 84 UCharsTrieTest::UCharsTrieTest() : builder_(NULL) { 85 IcuTestErrorCode errorCode(*this, "UCharsTrieTest()"); 86 builder_=new UCharsTrieBuilder(errorCode); 87 } 88 89 UCharsTrieTest::~UCharsTrieTest() { 90 delete builder_; 91 } 92 93 void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { 94 if(exec) { 95 logln("TestSuite UCharsTrieTest: "); 96 } 97 TESTCASE_AUTO_BEGIN; 98 TESTCASE_AUTO(TestBuilder); 99 TESTCASE_AUTO(TestEmpty); 100 TESTCASE_AUTO(Test_a); 101 TESTCASE_AUTO(Test_a_ab); 102 TESTCASE_AUTO(TestShortestBranch); 103 TESTCASE_AUTO(TestBranches); 104 TESTCASE_AUTO(TestLongSequence); 105 TESTCASE_AUTO(TestLongBranch); 106 TESTCASE_AUTO(TestValuesForState); 107 TESTCASE_AUTO(TestCompact); 108 TESTCASE_AUTO(TestFirstForCodePoint); 109 TESTCASE_AUTO(TestNextForCodePoint); 110 TESTCASE_AUTO(TestLargeTrie); 111 TESTCASE_AUTO(TestHasUniqueValue); 112 TESTCASE_AUTO(TestGetNextUChars); 113 TESTCASE_AUTO(TestIteratorFromBranch); 114 TESTCASE_AUTO(TestIteratorFromLinearMatch); 115 TESTCASE_AUTO(TestTruncatingIteratorFromRoot); 116 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); 117 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); 118 TESTCASE_AUTO(TestIteratorFromUChars); 119 TESTCASE_AUTO_END; 120 } 121 122 void UCharsTrieTest::TestBuilder() { 123 IcuTestErrorCode errorCode(*this, "TestBuilder()"); 124 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); 125 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { 126 errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); 127 return; 128 } 129 // TODO: remove .build(...) once add() checks for duplicates. 130 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); 131 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { 132 errln("UCharsTrieBuilder.add() did not detect duplicates"); 133 return; 134 } 135 } 136 137 void UCharsTrieTest::TestEmpty() { 138 static const StringAndValue data[]={ 139 { "", 0 } 140 }; 141 checkData(data, LENGTHOF(data)); 142 } 143 144 void UCharsTrieTest::Test_a() { 145 static const StringAndValue data[]={ 146 { "a", 1 } 147 }; 148 checkData(data, LENGTHOF(data)); 149 } 150 151 void UCharsTrieTest::Test_a_ab() { 152 static const StringAndValue data[]={ 153 { "a", 1 }, 154 { "ab", 100 } 155 }; 156 checkData(data, LENGTHOF(data)); 157 } 158 159 void UCharsTrieTest::TestShortestBranch() { 160 static const StringAndValue data[]={ 161 { "a", 1000 }, 162 { "b", 2000 } 163 }; 164 checkData(data, LENGTHOF(data)); 165 } 166 167 void UCharsTrieTest::TestBranches() { 168 static const StringAndValue data[]={ 169 { "a", 0x10 }, 170 { "cc", 0x40 }, 171 { "e", 0x100 }, 172 { "ggg", 0x400 }, 173 { "i", 0x1000 }, 174 { "kkkk", 0x4000 }, 175 { "n", 0x10000 }, 176 { "ppppp", 0x40000 }, 177 { "r", 0x100000 }, 178 { "sss", 0x200000 }, 179 { "t", 0x400000 }, 180 { "uu", 0x800000 }, 181 { "vv", 0x7fffffff }, 182 { "zz", (int32_t)0x80000000 } 183 }; 184 for(int32_t length=2; length<=LENGTHOF(data); ++length) { 185 infoln("TestBranches length=%d", (int)length); 186 checkData(data, length); 187 } 188 } 189 190 void UCharsTrieTest::TestLongSequence() { 191 static const StringAndValue data[]={ 192 { "a", -1 }, 193 // sequence of linear-match nodes 194 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 }, 195 // more than 256 units 196 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 197 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 198 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 199 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 200 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 201 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 } 202 }; 203 checkData(data, LENGTHOF(data)); 204 } 205 206 void UCharsTrieTest::TestLongBranch() { 207 // Split-branch and interesting compact-integer values. 208 static const StringAndValue data[]={ 209 { "a", -2 }, 210 { "b", -1 }, 211 { "c", 0 }, 212 { "d2", 1 }, 213 { "f", 0x3f }, 214 { "g", 0x40 }, 215 { "h", 0x41 }, 216 { "j23", 0x1900 }, 217 { "j24", 0x19ff }, 218 { "j25", 0x1a00 }, 219 { "k2", 0x1a80 }, 220 { "k3", 0x1aff }, 221 { "l234567890", 0x1b00 }, 222 { "l234567890123", 0x1b01 }, 223 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff }, 224 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 }, 225 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 }, 226 { "r", 0x333333 }, 227 { "s2345", 0x4444444 }, 228 { "t234567890", 0x77777777 }, 229 { "z", (int32_t)0x80000001 } 230 }; 231 checkData(data, LENGTHOF(data)); 232 } 233 234 void UCharsTrieTest::TestValuesForState() { 235 // Check that saveState() and resetToState() interact properly 236 // with next() and current(). 237 static const StringAndValue data[]={ 238 { "a", -1 }, 239 { "ab", -2 }, 240 { "abc", -3 }, 241 { "abcd", -4 }, 242 { "abcde", -5 }, 243 { "abcdef", -6 } 244 }; 245 checkData(data, LENGTHOF(data)); 246 } 247 248 void UCharsTrieTest::TestCompact() { 249 // Duplicate trailing strings and values provide opportunities for compacting. 250 static const StringAndValue data[]={ 251 { "+", 0 }, 252 { "+august", 8 }, 253 { "+december", 12 }, 254 { "+july", 7 }, 255 { "+june", 6 }, 256 { "+november", 11 }, 257 { "+october", 10 }, 258 { "+september", 9 }, 259 { "-", 0 }, 260 { "-august", 8 }, 261 { "-december", 12 }, 262 { "-july", 7 }, 263 { "-june", 6 }, 264 { "-november", 11 }, 265 { "-october", 10 }, 266 { "-september", 9 }, 267 // The l+n branch (with its sub-nodes) is a duplicate but will be written 268 // both times because each time it follows a different linear-match node. 269 { "xjuly", 7 }, 270 { "xjune", 6 } 271 }; 272 checkData(data, LENGTHOF(data)); 273 } 274 275 void UCharsTrieTest::TestFirstForCodePoint() { 276 static const StringAndValue data[]={ 277 { "a", 1 }, 278 { "a\\ud800", 2 }, 279 { "a\\U00010000", 3 }, 280 { "\\ud840", 4 }, 281 { "\\U00020000\\udbff", 5 }, 282 { "\\U00020000\\U0010ffff", 6 }, 283 { "\\U00020000\\U0010ffffz", 7 }, 284 { "\\U00050000xy", 8 }, 285 { "\\U00050000xyz", 9 } 286 }; 287 checkData(data, LENGTHOF(data)); 288 } 289 290 void UCharsTrieTest::TestNextForCodePoint() { 291 static const StringAndValue data[]={ 292 { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 }, 293 { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 }, 294 { "\\u4dff\\U000103ff", 99999 } 295 }; 296 LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 297 if(trie.isNull()) { 298 return; // buildTrie() reported an error 299 } 300 UStringTrieResult result; 301 if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 302 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 303 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 304 (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 305 (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 306 (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || 307 trie->getValue()!=2000000000 308 ) { 309 errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s); 310 } 311 if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 312 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 313 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 314 (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || 315 trie->getValue()!=44444 316 ) { 317 errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s); 318 } 319 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 320 (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 321 (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 322 (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current() // no match for trail surrogate 323 ) { 324 errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222"); 325 } 326 if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() || 327 (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() || 328 trie->getValue()!=99999 329 ) { 330 errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s); 331 } 332 } 333 334 // Definitions in the anonymous namespace are invisible outside this file. 335 namespace { 336 337 // Generate (string, value) pairs. 338 // The first string (before next()) will be empty. 339 class Generator { 340 public: 341 Generator() : value(4711), num(0) {} 342 void next() { 343 UChar c; 344 s.truncate(0); 345 s.append(c=(UChar)(value>>16)); 346 s.append((UChar)(value>>4)); 347 if(value&1) { 348 s.append((UChar)value); 349 } 350 set.add(c); 351 value+=((value>>5)&0x7ff)*3+1; 352 ++num; 353 } 354 const UnicodeString &getString() const { return s; } 355 int32_t getValue() const { return value; } 356 int32_t countUniqueFirstChars() const { return set.size(); } 357 int32_t getIndex() const { return num; } 358 359 private: 360 UnicodeString s; 361 UnicodeSet set; 362 int32_t value; 363 int32_t num; 364 }; 365 366 } // end namespace 367 368 UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) { 369 IcuTestErrorCode errorCode(*this, "buildLargeTrie()"); 370 Generator gen; 371 builder_->clear(); 372 while(gen.countUniqueFirstChars()<numUniqueFirst) { 373 builder_->add(gen.getString(), gen.getValue(), errorCode); 374 gen.next(); 375 } 376 infoln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex()); 377 UnicodeString trieUChars; 378 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); 379 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); 380 return new UCharsTrie(trieUChars.getBuffer()); 381 } 382 383 // Exercise a large branch node. 384 void UCharsTrieTest::TestLargeTrie() { 385 LocalPointer<UCharsTrie> trie(buildLargeTrie(1111)); 386 if(trie.isNull()) { 387 return; // buildTrie() reported an error 388 } 389 Generator gen; 390 while(gen.countUniqueFirstChars()<1111) { 391 UnicodeString x(gen.getString()); 392 int32_t value=gen.getValue(); 393 if(!x.isEmpty()) { 394 if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) { 395 errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n", 396 x[0], (long)gen.getIndex()); 397 break; 398 } 399 x.remove(0, 1); 400 } 401 UStringTrieResult result=trie->next(x.getBuffer(), x.length()); 402 if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) { 403 errln("next(%d chars U+%04X U+%04X)!=hasValue or " 404 "next()!=current() or getValue() wrong " 405 "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex()); 406 break; 407 } 408 gen.next(); 409 } 410 } 411 412 enum { 413 u_a=0x61, 414 u_b=0x62, 415 u_c=0x63, 416 u_j=0x6a, 417 u_n=0x6e, 418 u_r=0x72, 419 u_u=0x75, 420 u_y=0x79 421 }; 422 423 UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { 424 // All types of nodes leading to the same value, 425 // for code coverage of recursive functions. 426 // In particular, we need a lot of branches on some single level 427 // to exercise a split-branch node. 428 static const StringAndValue data[]={ 429 { "august", 8 }, 430 { "jan", 1 }, 431 { "jan.", 1 }, 432 { "jana", 1 }, 433 { "janbb", 1 }, 434 { "janc", 1 }, 435 { "janddd", 1 }, 436 { "janee", 1 }, 437 { "janef", 1 }, 438 { "janf", 1 }, 439 { "jangg", 1 }, 440 { "janh", 1 }, 441 { "janiiii", 1 }, 442 { "janj", 1 }, 443 { "jankk", 1 }, 444 { "jankl", 1 }, 445 { "jankmm", 1 }, 446 { "janl", 1 }, 447 { "janm", 1 }, 448 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 449 { "jano", 1 }, 450 { "janpp", 1 }, 451 { "janqqq", 1 }, 452 { "janr", 1 }, 453 { "januar", 1 }, 454 { "january", 1 }, 455 { "july", 7 }, 456 { "jun", 6 }, 457 { "jun.", 6 }, 458 { "june", 6 } 459 }; 460 return buildTrie(data, LENGTHOF(data), buildOption); 461 } 462 463 void UCharsTrieTest::TestHasUniqueValue() { 464 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 465 if(trie.isNull()) { 466 return; // buildTrie() reported an error 467 } 468 int32_t uniqueValue; 469 if(trie->hasUniqueValue(uniqueValue)) { 470 errln("unique value at root"); 471 } 472 trie->next(u_j); 473 trie->next(u_a); 474 trie->next(u_n); 475 // hasUniqueValue() directly after next() 476 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { 477 errln("not unique value 1 after \"jan\""); 478 } 479 trie->first(u_j); 480 trie->next(u_u); 481 if(trie->hasUniqueValue(uniqueValue)) { 482 errln("unique value after \"ju\""); 483 } 484 if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { 485 errln("not normal value 6 after \"jun\""); 486 } 487 // hasUniqueValue() after getValue() 488 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { 489 errln("not unique value 6 after \"jun\""); 490 } 491 // hasUniqueValue() from within a linear-match node 492 trie->first(u_a); 493 trie->next(u_u); 494 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { 495 errln("not unique value 8 after \"au\""); 496 } 497 } 498 499 void UCharsTrieTest::TestGetNextUChars() { 500 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 501 if(trie.isNull()) { 502 return; // buildTrie() reported an error 503 } 504 UnicodeString buffer; 505 UnicodeStringAppendable app(buffer); 506 int32_t count=trie->getNextUChars(app); 507 if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) { 508 errln("months getNextUChars()!=[aj] at root"); 509 } 510 trie->next(u_j); 511 trie->next(u_a); 512 trie->next(u_n); 513 // getNextUChars() directly after next() 514 buffer.remove(); 515 count=trie->getNextUChars(app); 516 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { 517 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\""); 518 } 519 // getNextUChars() after getValue() 520 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. 521 buffer.remove(); 522 count=trie->getNextUChars(app); 523 if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) { 524 errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); 525 } 526 // getNextUChars() from a linear-match node 527 trie->next(u_u); 528 buffer.remove(); 529 count=trie->getNextUChars(app); 530 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) { 531 errln("months getNextUChars()!=[a] after \"janu\""); 532 } 533 trie->next(u_a); 534 buffer.remove(); 535 count=trie->getNextUChars(app); 536 if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) { 537 errln("months getNextUChars()!=[r] after \"janua\""); 538 } 539 trie->next(u_r); 540 trie->next(u_y); 541 // getNextUChars() after a final match 542 buffer.remove(); 543 count=trie->getNextUChars(app); 544 if(count!=0 || buffer.length()!=0) { 545 errln("months getNextUChars()!=[] after \"january\""); 546 } 547 } 548 549 void UCharsTrieTest::TestIteratorFromBranch() { 550 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 551 if(trie.isNull()) { 552 return; // buildTrie() reported an error 553 } 554 // Go to a branch node. 555 trie->next(u_j); 556 trie->next(u_a); 557 trie->next(u_n); 558 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); 559 UCharsTrie::Iterator iter(*trie, 0, errorCode); 560 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 561 return; 562 } 563 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 564 // following "jan". 565 static const StringAndValue data[]={ 566 { "", 1 }, 567 { ".", 1 }, 568 { "a", 1 }, 569 { "bb", 1 }, 570 { "c", 1 }, 571 { "ddd", 1 }, 572 { "ee", 1 }, 573 { "ef", 1 }, 574 { "f", 1 }, 575 { "gg", 1 }, 576 { "h", 1 }, 577 { "iiii", 1 }, 578 { "j", 1 }, 579 { "kk", 1 }, 580 { "kl", 1 }, 581 { "kmm", 1 }, 582 { "l", 1 }, 583 { "m", 1 }, 584 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 585 { "o", 1 }, 586 { "pp", 1 }, 587 { "qqq", 1 }, 588 { "r", 1 }, 589 { "uar", 1 }, 590 { "uary", 1 } 591 }; 592 checkIterator(iter, data, LENGTHOF(data)); 593 // Reset, and we should get the same result. 594 logln("after iter.reset()"); 595 checkIterator(iter.reset(), data, LENGTHOF(data)); 596 } 597 598 void UCharsTrieTest::TestIteratorFromLinearMatch() { 599 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 600 if(trie.isNull()) { 601 return; // buildTrie() reported an error 602 } 603 // Go into a linear-match node. 604 trie->next(u_j); 605 trie->next(u_a); 606 trie->next(u_n); 607 trie->next(u_u); 608 trie->next(u_a); 609 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); 610 UCharsTrie::Iterator iter(*trie, 0, errorCode); 611 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 612 return; 613 } 614 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 615 // following "janua". 616 static const StringAndValue data[]={ 617 { "r", 1 }, 618 { "ry", 1 } 619 }; 620 checkIterator(iter, data, LENGTHOF(data)); 621 // Reset, and we should get the same result. 622 logln("after iter.reset()"); 623 checkIterator(iter.reset(), data, LENGTHOF(data)); 624 } 625 626 void UCharsTrieTest::TestTruncatingIteratorFromRoot() { 627 LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 628 if(trie.isNull()) { 629 return; // buildTrie() reported an error 630 } 631 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); 632 UCharsTrie::Iterator iter(*trie, 4, errorCode); 633 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 634 return; 635 } 636 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters 637 // of each string, and no string duplicates from the truncation. 638 static const StringAndValue data[]={ 639 { "augu", -1 }, 640 { "jan", 1 }, 641 { "jan.", 1 }, 642 { "jana", 1 }, 643 { "janb", -1 }, 644 { "janc", 1 }, 645 { "jand", -1 }, 646 { "jane", -1 }, 647 { "janf", 1 }, 648 { "jang", -1 }, 649 { "janh", 1 }, 650 { "jani", -1 }, 651 { "janj", 1 }, 652 { "jank", -1 }, 653 { "janl", 1 }, 654 { "janm", 1 }, 655 { "jann", -1 }, 656 { "jano", 1 }, 657 { "janp", -1 }, 658 { "janq", -1 }, 659 { "janr", 1 }, 660 { "janu", -1 }, 661 { "july", 7 }, 662 { "jun", 6 }, 663 { "jun.", 6 }, 664 { "june", 6 } 665 }; 666 checkIterator(iter, data, LENGTHOF(data)); 667 // Reset, and we should get the same result. 668 logln("after iter.reset()"); 669 checkIterator(iter.reset(), data, LENGTHOF(data)); 670 } 671 672 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() { 673 static const StringAndValue data[]={ 674 { "abcdef", 10 }, 675 { "abcdepq", 200 }, 676 { "abcdeyz", 3000 } 677 }; 678 LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 679 if(trie.isNull()) { 680 return; // buildTrie() reported an error 681 } 682 // Go into a linear-match node. 683 trie->next(u_a); 684 trie->next(u_b); 685 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); 686 // Truncate within the linear-match node. 687 UCharsTrie::Iterator iter(*trie, 2, errorCode); 688 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 689 return; 690 } 691 static const StringAndValue expected[]={ 692 { "cd", -1 } 693 }; 694 checkIterator(iter, expected, LENGTHOF(expected)); 695 // Reset, and we should get the same result. 696 logln("after iter.reset()"); 697 checkIterator(iter.reset(), expected, LENGTHOF(expected)); 698 } 699 700 void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() { 701 static const StringAndValue data[]={ 702 { "abcdef", 10 }, 703 { "abcdepq", 200 }, 704 { "abcdeyz", 3000 } 705 }; 706 LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 707 if(trie.isNull()) { 708 return; // buildTrie() reported an error 709 } 710 // Go into a linear-match node. 711 trie->next(u_a); 712 trie->next(u_b); 713 trie->next(u_c); 714 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); 715 // Truncate after the linear-match node. 716 UCharsTrie::Iterator iter(*trie, 3, errorCode); 717 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) { 718 return; 719 } 720 static const StringAndValue expected[]={ 721 { "def", 10 }, 722 { "dep", -1 }, 723 { "dey", -1 } 724 }; 725 checkIterator(iter, expected, LENGTHOF(expected)); 726 // Reset, and we should get the same result. 727 logln("after iter.reset()"); 728 checkIterator(iter.reset(), expected, LENGTHOF(expected)); 729 } 730 731 void UCharsTrieTest::TestIteratorFromUChars() { 732 static const StringAndValue data[]={ 733 { "mm", 3 }, 734 { "mmm", 33 }, 735 { "mmnop", 333 } 736 }; 737 builder_->clear(); 738 IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()"); 739 for(int32_t i=0; i<LENGTHOF(data); ++i) { 740 builder_->add(data[i].s, data[i].value, errorCode); 741 } 742 UnicodeString trieUChars; 743 builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode); 744 UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode); 745 checkIterator(iter, data, LENGTHOF(data)); 746 } 747 748 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { 749 logln("checkData(dataLength=%d, fast)", (int)dataLength); 750 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); 751 logln("checkData(dataLength=%d, small)", (int)dataLength); 752 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); 753 } 754 755 void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { 756 LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption)); 757 if(trie.isNull()) { 758 return; // buildTrie() reported an error 759 } 760 checkFirst(*trie, data, dataLength); 761 checkNext(*trie, data, dataLength); 762 checkNextWithState(*trie, data, dataLength); 763 checkNextString(*trie, data, dataLength); 764 checkIterator(*trie, data, dataLength); 765 } 766 767 UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, 768 UStringTrieBuildOption buildOption) { 769 IcuTestErrorCode errorCode(*this, "buildTrie()"); 770 // Add the items to the trie builder in an interesting (not trivial, not random) order. 771 int32_t index, step; 772 if(dataLength&1) { 773 // Odd number of items. 774 index=dataLength/2; 775 step=2; 776 } else if((dataLength%3)!=0) { 777 // Not a multiple of 3. 778 index=dataLength/5; 779 step=3; 780 } else { 781 index=dataLength-1; 782 step=-1; 783 } 784 builder_->clear(); 785 for(int32_t i=0; i<dataLength; ++i) { 786 builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(), 787 data[index].value, errorCode); 788 index=(index+step)%dataLength; 789 } 790 UnicodeString trieUChars; 791 builder_->buildUnicodeString(buildOption, trieUChars, errorCode); 792 LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode)); 793 if(!errorCode.logIfFailureAndReset("add()/build()")) { 794 builder_->add("zzz", 999, errorCode); 795 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { 796 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); 797 } 798 } 799 logln("serialized trie size: %ld UChars\n", (long)trieUChars.length()); 800 UnicodeString trieUChars2; 801 builder_->buildUnicodeString(buildOption, trieUChars2, errorCode); 802 if(trieUChars.getBuffer()==trieUChars2.getBuffer()) { 803 errln("builder.buildUnicodeString() before & after build() returned same array"); 804 } 805 if(errorCode.isFailure()) { 806 return NULL; 807 } 808 // Tries from either build() method should be identical but 809 // UCharsTrie does not implement equals(). 810 // We just return either one. 811 if((dataLength&1)!=0) { 812 return trie.orphan(); 813 } else { 814 return new UCharsTrie(trieUChars2.getBuffer()); 815 } 816 } 817 818 void UCharsTrieTest::checkFirst(UCharsTrie &trie, 819 const StringAndValue data[], int32_t dataLength) { 820 for(int32_t i=0; i<dataLength; ++i) { 821 if(*data[i].s==0) { 822 continue; // skip empty string 823 } 824 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 825 UChar32 c=expectedString[0]; 826 UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0; 827 UStringTrieResult firstResult=trie.first(c); 828 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; 829 UStringTrieResult nextResult=trie.next(nextCp); 830 if(firstResult!=trie.reset().next(c) || 831 firstResult!=trie.current() || 832 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || 833 nextResult!=trie.next(nextCp) 834 ) { 835 errln("trie.first(U+%04X)!=trie.reset().next(same) for %s", 836 c, data[i].s); 837 } 838 c=expectedString.char32At(0); 839 int32_t cLength=U16_LENGTH(c); 840 nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0; 841 firstResult=trie.firstForCodePoint(c); 842 firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; 843 nextResult=trie.nextForCodePoint(nextCp); 844 if(firstResult!=trie.reset().nextForCodePoint(c) || 845 firstResult!=trie.current() || 846 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || 847 nextResult!=trie.nextForCodePoint(nextCp) 848 ) { 849 errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s", 850 c, data[i].s); 851 } 852 } 853 trie.reset(); 854 } 855 856 void UCharsTrieTest::checkNext(UCharsTrie &trie, 857 const StringAndValue data[], int32_t dataLength) { 858 UCharsTrie::State state; 859 for(int32_t i=0; i<dataLength; ++i) { 860 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 861 int32_t stringLength= (i&1) ? -1 : expectedString.length(); 862 UStringTrieResult result; 863 if( !USTRINGTRIE_HAS_VALUE( 864 result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) || 865 result!=trie.current() 866 ) { 867 errln("trie does not seem to contain %s", data[i].s); 868 } else if(trie.getValue()!=data[i].value) { 869 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 870 data[i].s, 871 (long)trie.getValue(), (long)trie.getValue(), 872 (long)data[i].value, (long)data[i].value); 873 } else if(result!=trie.current() || trie.getValue()!=data[i].value) { 874 errln("trie value for %s changes when repeating current()/getValue()", data[i].s); 875 } 876 trie.reset(); 877 stringLength=expectedString.length(); 878 result=trie.current(); 879 for(int32_t j=0; j<stringLength; ++j) { 880 if(!USTRINGTRIE_HAS_NEXT(result)) { 881 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j); 882 break; 883 } 884 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) { 885 trie.getValue(); 886 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) { 887 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j); 888 break; 889 } 890 } 891 result=trie.next(expectedString[j]); 892 if(!USTRINGTRIE_MATCHES(result)) { 893 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j); 894 break; 895 } 896 if(result!=trie.current()) { 897 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j); 898 break; 899 } 900 } 901 if(!USTRINGTRIE_HAS_VALUE(result)) { 902 errln("trie.next()!=hasValue at the end of %s", data[i].s); 903 continue; 904 } 905 trie.getValue(); 906 if(result!=trie.current()) { 907 errln("trie.current() != current()+getValue()+current() after end of %s", 908 data[i].s); 909 } 910 // Compare the final current() with whether next() can actually continue. 911 trie.saveState(state); 912 UBool nextContinues=FALSE; 913 for(int32_t c=0x20; c<0xe000; ++c) { 914 if(c==0x80) { 915 c=0xd800; // Check for ASCII and surrogates but not all of the BMP. 916 } 917 if(trie.resetToState(state).next(c)) { 918 nextContinues=TRUE; 919 break; 920 } 921 } 922 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) { 923 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts " 924 "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s); 925 } 926 trie.reset(); 927 } 928 } 929 930 void UCharsTrieTest::checkNextWithState(UCharsTrie &trie, 931 const StringAndValue data[], int32_t dataLength) { 932 UCharsTrie::State noState, state; 933 for(int32_t i=0; i<dataLength; ++i) { 934 if((i&1)==0) { 935 // This should have no effect. 936 trie.resetToState(noState); 937 } 938 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 939 int32_t stringLength=expectedString.length(); 940 int32_t partialLength=stringLength/3; 941 for(int32_t j=0; j<partialLength; ++j) { 942 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) { 943 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s); 944 return; 945 } 946 } 947 trie.saveState(state); 948 UStringTrieResult resultAtState=trie.current(); 949 UStringTrieResult result; 950 int32_t valueAtState=-99; 951 if(USTRINGTRIE_HAS_VALUE(resultAtState)) { 952 valueAtState=trie.getValue(); 953 } 954 result=trie.next(0); // mismatch 955 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) { 956 errln("trie.next(0) matched after part of %s", data[i].s); 957 } 958 if( resultAtState!=trie.resetToState(state).current() || 959 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue()) 960 ) { 961 errln("trie.next(part of %s) changes current()/getValue() after " 962 "saveState/next(0)/resetToState", 963 data[i].s); 964 } else if(!USTRINGTRIE_HAS_VALUE( 965 result=trie.next(expectedString.getTerminatedBuffer()+partialLength, 966 stringLength-partialLength)) || 967 result!=trie.current()) { 968 errln("trie.next(rest of %s) does not seem to contain %s after " 969 "saveState/next(0)/resetToState", 970 data[i].s, data[i].s); 971 } else if(!USTRINGTRIE_HAS_VALUE( 972 result=trie.resetToState(state). 973 next(expectedString.getTerminatedBuffer()+partialLength, 974 stringLength-partialLength)) || 975 result!=trie.current()) { 976 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState", 977 data[i].s); 978 } else if(trie.getValue()!=data[i].value) { 979 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 980 data[i].s, 981 (long)trie.getValue(), (long)trie.getValue(), 982 (long)data[i].value, (long)data[i].value); 983 } 984 trie.reset(); 985 } 986 } 987 988 // next(string) is also tested in other functions, 989 // but here we try to go partway through the string, and then beyond it. 990 void UCharsTrieTest::checkNextString(UCharsTrie &trie, 991 const StringAndValue data[], int32_t dataLength) { 992 for(int32_t i=0; i<dataLength; ++i) { 993 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 994 int32_t stringLength=expectedString.length(); 995 if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) { 996 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s); 997 continue; 998 } 999 // Test that we stop properly at the end of the string. 1000 if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2, 1001 stringLength+1-stringLength/2)) { 1002 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s); 1003 } 1004 trie.reset(); 1005 } 1006 } 1007 1008 void UCharsTrieTest::checkIterator(UCharsTrie &trie, 1009 const StringAndValue data[], int32_t dataLength) { 1010 IcuTestErrorCode errorCode(*this, "checkIterator()"); 1011 UCharsTrie::Iterator iter(trie, 0, errorCode); 1012 if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) { 1013 return; 1014 } 1015 checkIterator(iter, data, dataLength); 1016 } 1017 1018 void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter, 1019 const StringAndValue data[], int32_t dataLength) { 1020 IcuTestErrorCode errorCode(*this, "checkIterator()"); 1021 for(int32_t i=0; i<dataLength; ++i) { 1022 if(!iter.hasNext()) { 1023 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s); 1024 break; 1025 } 1026 UBool hasNext=iter.next(errorCode); 1027 if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) { 1028 break; 1029 } 1030 if(!hasNext) { 1031 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s); 1032 break; 1033 } 1034 UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape(); 1035 if(iter.getString()!=expectedString) { 1036 char buffer[1000]; 1037 UnicodeString invString(prettify(iter.getString())); 1038 invString.extract(0, invString.length(), buffer, LENGTHOF(buffer), US_INV); 1039 errln("trie iterator next().getString()=%s but expected %s for item %d", 1040 buffer, data[i].s, (int)i); 1041 } 1042 if(iter.getValue()!=data[i].value) { 1043 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s", 1044 (long)iter.getValue(), (long)iter.getValue(), 1045 (long)data[i].value, (long)data[i].value, 1046 (int)i, data[i].s); 1047 } 1048 } 1049 if(iter.hasNext()) { 1050 errln("trie iterator hasNext()=TRUE after all items"); 1051 } 1052 UBool hasNext=iter.next(errorCode); 1053 errorCode.logIfFailureAndReset("trie iterator next() after all items"); 1054 if(hasNext) { 1055 errln("trie iterator next()=TRUE after all items"); 1056 } 1057 } 1058