1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: bytetrietest.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2010nov16 12 * created by: Markus W. Scherer 13 */ 14 15 #include <string.h> 16 17 #include "unicode/utypes.h" 18 #include "unicode/bytestrie.h" 19 #include "unicode/bytestriebuilder.h" 20 #include "unicode/localpointer.h" 21 #include "unicode/stringpiece.h" 22 #include "intltest.h" 23 #include "cmemory.h" 24 25 struct StringAndValue { 26 const char *s; 27 int32_t value; 28 }; 29 30 class BytesTrieTest : public IntlTest { 31 public: 32 BytesTrieTest(); 33 virtual ~BytesTrieTest(); 34 35 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); 36 void TestBuilder(); 37 void TestEmpty(); 38 void Test_a(); 39 void Test_a_ab(); 40 void TestShortestBranch(); 41 void TestBranches(); 42 void TestLongSequence(); 43 void TestLongBranch(); 44 void TestValuesForState(); 45 void TestCompact(); 46 47 BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); 48 void TestHasUniqueValue(); 49 void TestGetNextBytes(); 50 void TestIteratorFromBranch(); 51 void TestIteratorFromLinearMatch(); 52 void TestTruncatingIteratorFromRoot(); 53 void TestTruncatingIteratorFromLinearMatchShort(); 54 void TestTruncatingIteratorFromLinearMatchLong(); 55 void TestIteratorFromBytes(); 56 57 void checkData(const StringAndValue data[], int32_t dataLength); 58 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); 59 BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength, 60 UStringTrieBuildOption buildOption); 61 void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 62 void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 63 void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 64 void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 65 void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 66 void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); 67 68 private: 69 BytesTrieBuilder *builder_; 70 }; 71 72 extern IntlTest *createBytesTrieTest() { 73 return new BytesTrieTest(); 74 } 75 76 BytesTrieTest::BytesTrieTest() : builder_(NULL) { 77 IcuTestErrorCode errorCode(*this, "BytesTrieTest()"); 78 builder_=new BytesTrieBuilder(errorCode); 79 } 80 81 BytesTrieTest::~BytesTrieTest() { 82 delete builder_; 83 } 84 85 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { 86 if(exec) { 87 logln("TestSuite BytesTrieTest: "); 88 } 89 TESTCASE_AUTO_BEGIN; 90 TESTCASE_AUTO(TestBuilder); 91 TESTCASE_AUTO(TestEmpty); 92 TESTCASE_AUTO(Test_a); 93 TESTCASE_AUTO(Test_a_ab); 94 TESTCASE_AUTO(TestShortestBranch); 95 TESTCASE_AUTO(TestBranches); 96 TESTCASE_AUTO(TestLongSequence); 97 TESTCASE_AUTO(TestLongBranch); 98 TESTCASE_AUTO(TestValuesForState); 99 TESTCASE_AUTO(TestCompact); 100 TESTCASE_AUTO(TestHasUniqueValue); 101 TESTCASE_AUTO(TestGetNextBytes); 102 TESTCASE_AUTO(TestIteratorFromBranch); 103 TESTCASE_AUTO(TestIteratorFromLinearMatch); 104 TESTCASE_AUTO(TestTruncatingIteratorFromRoot); 105 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); 106 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); 107 TESTCASE_AUTO(TestIteratorFromBytes); 108 TESTCASE_AUTO_END; 109 } 110 111 void BytesTrieTest::TestBuilder() { 112 IcuTestErrorCode errorCode(*this, "TestBuilder()"); 113 builder_->clear(); 114 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); 115 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { 116 errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); 117 return; 118 } 119 // TODO: remove .build(...) once add() checks for duplicates. 120 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); 121 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { 122 errln("BytesTrieBuilder.add() did not detect duplicates"); 123 return; 124 } 125 } 126 127 void BytesTrieTest::TestEmpty() { 128 static const StringAndValue data[]={ 129 { "", 0 } 130 }; 131 checkData(data, UPRV_LENGTHOF(data)); 132 } 133 134 void BytesTrieTest::Test_a() { 135 static const StringAndValue data[]={ 136 { "a", 1 } 137 }; 138 checkData(data, UPRV_LENGTHOF(data)); 139 } 140 141 void BytesTrieTest::Test_a_ab() { 142 static const StringAndValue data[]={ 143 { "a", 1 }, 144 { "ab", 100 } 145 }; 146 checkData(data, UPRV_LENGTHOF(data)); 147 } 148 149 void BytesTrieTest::TestShortestBranch() { 150 static const StringAndValue data[]={ 151 { "a", 1000 }, 152 { "b", 2000 } 153 }; 154 checkData(data, UPRV_LENGTHOF(data)); 155 } 156 157 void BytesTrieTest::TestBranches() { 158 static const StringAndValue data[]={ 159 { "a", 0x10 }, 160 { "cc", 0x40 }, 161 { "e", 0x100 }, 162 { "ggg", 0x400 }, 163 { "i", 0x1000 }, 164 { "kkkk", 0x4000 }, 165 { "n", 0x10000 }, 166 { "ppppp", 0x40000 }, 167 { "r", 0x100000 }, 168 { "sss", 0x200000 }, 169 { "t", 0x400000 }, 170 { "uu", 0x800000 }, 171 { "vv", 0x7fffffff }, 172 { "zz", (int32_t)0x80000000 } 173 }; 174 for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) { 175 logln("TestBranches length=%d", (int)length); 176 checkData(data, length); 177 } 178 } 179 180 void BytesTrieTest::TestLongSequence() { 181 static const StringAndValue data[]={ 182 { "a", -1 }, 183 // sequence of linear-match nodes 184 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 }, 185 // more than 256 bytes 186 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 187 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 188 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 189 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 190 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 191 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 } 192 }; 193 checkData(data, UPRV_LENGTHOF(data)); 194 } 195 196 void BytesTrieTest::TestLongBranch() { 197 // Split-branch and interesting compact-integer values. 198 static const StringAndValue data[]={ 199 { "a", -2 }, 200 { "b", -1 }, 201 { "c", 0 }, 202 { "d2", 1 }, 203 { "f", 0x3f }, 204 { "g", 0x40 }, 205 { "h", 0x41 }, 206 { "j23", 0x1900 }, 207 { "j24", 0x19ff }, 208 { "j25", 0x1a00 }, 209 { "k2", 0x1a80 }, 210 { "k3", 0x1aff }, 211 { "l234567890", 0x1b00 }, 212 { "l234567890123", 0x1b01 }, 213 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff }, 214 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 }, 215 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 }, 216 { "r", 0x333333 }, 217 { "s2345", 0x4444444 }, 218 { "t234567890", 0x77777777 }, 219 { "z", (int32_t)0x80000001 } 220 }; 221 checkData(data, UPRV_LENGTHOF(data)); 222 } 223 224 void BytesTrieTest::TestValuesForState() { 225 // Check that saveState() and resetToState() interact properly 226 // with next() and current(). 227 static const StringAndValue data[]={ 228 { "a", -1 }, 229 { "ab", -2 }, 230 { "abc", -3 }, 231 { "abcd", -4 }, 232 { "abcde", -5 }, 233 { "abcdef", -6 } 234 }; 235 checkData(data, UPRV_LENGTHOF(data)); 236 } 237 238 void BytesTrieTest::TestCompact() { 239 // Duplicate trailing strings and values provide opportunities for compacting. 240 static const StringAndValue data[]={ 241 { "+", 0 }, 242 { "+august", 8 }, 243 { "+december", 12 }, 244 { "+july", 7 }, 245 { "+june", 6 }, 246 { "+november", 11 }, 247 { "+october", 10 }, 248 { "+september", 9 }, 249 { "-", 0 }, 250 { "-august", 8 }, 251 { "-december", 12 }, 252 { "-july", 7 }, 253 { "-june", 6 }, 254 { "-november", 11 }, 255 { "-october", 10 }, 256 { "-september", 9 }, 257 // The l+n branch (with its sub-nodes) is a duplicate but will be written 258 // both times because each time it follows a different linear-match node. 259 { "xjuly", 7 }, 260 { "xjune", 6 } 261 }; 262 checkData(data, UPRV_LENGTHOF(data)); 263 } 264 265 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { 266 // All types of nodes leading to the same value, 267 // for code coverage of recursive functions. 268 // In particular, we need a lot of branches on some single level 269 // to exercise a split-branch node. 270 static const StringAndValue data[]={ 271 { "august", 8 }, 272 { "jan", 1 }, 273 { "jan.", 1 }, 274 { "jana", 1 }, 275 { "janbb", 1 }, 276 { "janc", 1 }, 277 { "janddd", 1 }, 278 { "janee", 1 }, 279 { "janef", 1 }, 280 { "janf", 1 }, 281 { "jangg", 1 }, 282 { "janh", 1 }, 283 { "janiiii", 1 }, 284 { "janj", 1 }, 285 { "jankk", 1 }, 286 { "jankl", 1 }, 287 { "jankmm", 1 }, 288 { "janl", 1 }, 289 { "janm", 1 }, 290 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 291 { "jano", 1 }, 292 { "janpp", 1 }, 293 { "janqqq", 1 }, 294 { "janr", 1 }, 295 { "januar", 1 }, 296 { "january", 1 }, 297 { "july", 7 }, 298 { "jun", 6 }, 299 { "jun.", 6 }, 300 { "june", 6 } 301 }; 302 return buildTrie(data, UPRV_LENGTHOF(data), buildOption); 303 } 304 305 void BytesTrieTest::TestHasUniqueValue() { 306 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 307 if(trie.isNull()) { 308 return; // buildTrie() reported an error 309 } 310 int32_t uniqueValue; 311 if(trie->hasUniqueValue(uniqueValue)) { 312 errln("unique value at root"); 313 } 314 trie->next('j'); 315 trie->next('a'); 316 trie->next('n'); 317 // hasUniqueValue() directly after next() 318 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { 319 errln("not unique value 1 after \"jan\""); 320 } 321 trie->first('j'); 322 trie->next('u'); 323 if(trie->hasUniqueValue(uniqueValue)) { 324 errln("unique value after \"ju\""); 325 } 326 if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { 327 errln("not normal value 6 after \"jun\""); 328 } 329 // hasUniqueValue() after getValue() 330 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { 331 errln("not unique value 6 after \"jun\""); 332 } 333 // hasUniqueValue() from within a linear-match node 334 trie->first('a'); 335 trie->next('u'); 336 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { 337 errln("not unique value 8 after \"au\""); 338 } 339 } 340 341 void BytesTrieTest::TestGetNextBytes() { 342 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 343 if(trie.isNull()) { 344 return; // buildTrie() reported an error 345 } 346 char buffer[40]; 347 CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer)); 348 int32_t count=trie->getNextBytes(sink); 349 if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') { 350 errln("months getNextBytes()!=[aj] at root"); 351 } 352 trie->next('j'); 353 trie->next('a'); 354 trie->next('n'); 355 // getNextBytes() directly after next() 356 count=trie->getNextBytes(sink.Reset()); 357 buffer[count]=0; 358 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) { 359 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\""); 360 } 361 // getNextBytes() after getValue() 362 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. 363 memset(buffer, 0, sizeof(buffer)); 364 count=trie->getNextBytes(sink.Reset()); 365 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) { 366 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); 367 } 368 // getNextBytes() from a linear-match node 369 trie->next('u'); 370 memset(buffer, 0, sizeof(buffer)); 371 count=trie->getNextBytes(sink.Reset()); 372 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') { 373 errln("months getNextBytes()!=[a] after \"janu\""); 374 } 375 trie->next('a'); 376 memset(buffer, 0, sizeof(buffer)); 377 count=trie->getNextBytes(sink.Reset()); 378 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') { 379 errln("months getNextBytes()!=[r] after \"janua\""); 380 } 381 trie->next('r'); 382 trie->next('y'); 383 // getNextBytes() after a final match 384 count=trie->getNextBytes(sink.Reset()); 385 if(count!=0 || sink.NumberOfBytesAppended()!=0) { 386 errln("months getNextBytes()!=[] after \"january\""); 387 } 388 } 389 390 void BytesTrieTest::TestIteratorFromBranch() { 391 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 392 if(trie.isNull()) { 393 return; // buildTrie() reported an error 394 } 395 // Go to a branch node. 396 trie->next('j'); 397 trie->next('a'); 398 trie->next('n'); 399 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); 400 BytesTrie::Iterator iter(*trie, 0, errorCode); 401 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 402 return; 403 } 404 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 405 // following "jan". 406 static const StringAndValue data[]={ 407 { "", 1 }, 408 { ".", 1 }, 409 { "a", 1 }, 410 { "bb", 1 }, 411 { "c", 1 }, 412 { "ddd", 1 }, 413 { "ee", 1 }, 414 { "ef", 1 }, 415 { "f", 1 }, 416 { "gg", 1 }, 417 { "h", 1 }, 418 { "iiii", 1 }, 419 { "j", 1 }, 420 { "kk", 1 }, 421 { "kl", 1 }, 422 { "kmm", 1 }, 423 { "l", 1 }, 424 { "m", 1 }, 425 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 426 { "o", 1 }, 427 { "pp", 1 }, 428 { "qqq", 1 }, 429 { "r", 1 }, 430 { "uar", 1 }, 431 { "uary", 1 } 432 }; 433 checkIterator(iter, data, UPRV_LENGTHOF(data)); 434 // Reset, and we should get the same result. 435 logln("after iter.reset()"); 436 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 437 } 438 439 void BytesTrieTest::TestIteratorFromLinearMatch() { 440 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 441 if(trie.isNull()) { 442 return; // buildTrie() reported an error 443 } 444 // Go into a linear-match node. 445 trie->next('j'); 446 trie->next('a'); 447 trie->next('n'); 448 trie->next('u'); 449 trie->next('a'); 450 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); 451 BytesTrie::Iterator iter(*trie, 0, errorCode); 452 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 453 return; 454 } 455 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 456 // following "janua". 457 static const StringAndValue data[]={ 458 { "r", 1 }, 459 { "ry", 1 } 460 }; 461 checkIterator(iter, data, UPRV_LENGTHOF(data)); 462 // Reset, and we should get the same result. 463 logln("after iter.reset()"); 464 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 465 } 466 467 void BytesTrieTest::TestTruncatingIteratorFromRoot() { 468 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 469 if(trie.isNull()) { 470 return; // buildTrie() reported an error 471 } 472 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); 473 BytesTrie::Iterator iter(*trie, 4, errorCode); 474 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 475 return; 476 } 477 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters 478 // of each string, and no string duplicates from the truncation. 479 static const StringAndValue data[]={ 480 { "augu", -1 }, 481 { "jan", 1 }, 482 { "jan.", 1 }, 483 { "jana", 1 }, 484 { "janb", -1 }, 485 { "janc", 1 }, 486 { "jand", -1 }, 487 { "jane", -1 }, 488 { "janf", 1 }, 489 { "jang", -1 }, 490 { "janh", 1 }, 491 { "jani", -1 }, 492 { "janj", 1 }, 493 { "jank", -1 }, 494 { "janl", 1 }, 495 { "janm", 1 }, 496 { "jann", -1 }, 497 { "jano", 1 }, 498 { "janp", -1 }, 499 { "janq", -1 }, 500 { "janr", 1 }, 501 { "janu", -1 }, 502 { "july", 7 }, 503 { "jun", 6 }, 504 { "jun.", 6 }, 505 { "june", 6 } 506 }; 507 checkIterator(iter, data, UPRV_LENGTHOF(data)); 508 // Reset, and we should get the same result. 509 logln("after iter.reset()"); 510 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 511 } 512 513 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() { 514 static const StringAndValue data[]={ 515 { "abcdef", 10 }, 516 { "abcdepq", 200 }, 517 { "abcdeyz", 3000 } 518 }; 519 LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 520 if(trie.isNull()) { 521 return; // buildTrie() reported an error 522 } 523 // Go into a linear-match node. 524 trie->next('a'); 525 trie->next('b'); 526 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); 527 // Truncate within the linear-match node. 528 BytesTrie::Iterator iter(*trie, 2, errorCode); 529 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 530 return; 531 } 532 static const StringAndValue expected[]={ 533 { "cd", -1 } 534 }; 535 checkIterator(iter, expected, UPRV_LENGTHOF(expected)); 536 // Reset, and we should get the same result. 537 logln("after iter.reset()"); 538 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); 539 } 540 541 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() { 542 static const StringAndValue data[]={ 543 { "abcdef", 10 }, 544 { "abcdepq", 200 }, 545 { "abcdeyz", 3000 } 546 }; 547 LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 548 if(trie.isNull()) { 549 return; // buildTrie() reported an error 550 } 551 // Go into a linear-match node. 552 trie->next('a'); 553 trie->next('b'); 554 trie->next('c'); 555 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); 556 // Truncate after the linear-match node. 557 BytesTrie::Iterator iter(*trie, 3, errorCode); 558 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 559 return; 560 } 561 static const StringAndValue expected[]={ 562 { "def", 10 }, 563 { "dep", -1 }, 564 { "dey", -1 } 565 }; 566 checkIterator(iter, expected, UPRV_LENGTHOF(expected)); 567 // Reset, and we should get the same result. 568 logln("after iter.reset()"); 569 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); 570 } 571 572 void BytesTrieTest::TestIteratorFromBytes() { 573 static const StringAndValue data[]={ 574 { "mm", 3 }, 575 { "mmm", 33 }, 576 { "mmnop", 333 } 577 }; 578 builder_->clear(); 579 IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()"); 580 for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) { 581 builder_->add(data[i].s, data[i].value, errorCode); 582 } 583 StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode); 584 BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode); 585 checkIterator(iter, data, UPRV_LENGTHOF(data)); 586 } 587 588 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { 589 logln("checkData(dataLength=%d, fast)", (int)dataLength); 590 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); 591 logln("checkData(dataLength=%d, small)", (int)dataLength); 592 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); 593 } 594 595 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { 596 LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption)); 597 if(trie.isNull()) { 598 return; // buildTrie() reported an error 599 } 600 checkFirst(*trie, data, dataLength); 601 checkNext(*trie, data, dataLength); 602 checkNextWithState(*trie, data, dataLength); 603 checkNextString(*trie, data, dataLength); 604 checkIterator(*trie, data, dataLength); 605 } 606 607 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, 608 UStringTrieBuildOption buildOption) { 609 IcuTestErrorCode errorCode(*this, "buildTrie()"); 610 // Add the items to the trie builder in an interesting (not trivial, not random) order. 611 int32_t index, step; 612 if(dataLength&1) { 613 // Odd number of items. 614 index=dataLength/2; 615 step=2; 616 } else if((dataLength%3)!=0) { 617 // Not a multiple of 3. 618 index=dataLength/5; 619 step=3; 620 } else { 621 index=dataLength-1; 622 step=-1; 623 } 624 builder_->clear(); 625 for(int32_t i=0; i<dataLength; ++i) { 626 builder_->add(data[index].s, data[index].value, errorCode); 627 index=(index+step)%dataLength; 628 } 629 StringPiece sp=builder_->buildStringPiece(buildOption, errorCode); 630 LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode)); 631 if(!errorCode.logIfFailureAndReset("add()/build()")) { 632 builder_->add("zzz", 999, errorCode); 633 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { 634 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); 635 } 636 } 637 logln("serialized trie size: %ld bytes\n", (long)sp.length()); 638 StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode); 639 if(sp.data()==sp2.data()) { 640 errln("builder.buildStringPiece() before & after build() returned same array"); 641 } 642 if(errorCode.isFailure()) { 643 return NULL; 644 } 645 // Tries from either build() method should be identical but 646 // BytesTrie does not implement equals(). 647 // We just return either one. 648 if((dataLength&1)!=0) { 649 return trie.orphan(); 650 } else { 651 return new BytesTrie(sp2.data()); 652 } 653 } 654 655 void BytesTrieTest::checkFirst(BytesTrie &trie, 656 const StringAndValue data[], int32_t dataLength) { 657 for(int32_t i=0; i<dataLength; ++i) { 658 int c=*data[i].s; 659 if(c==0) { 660 continue; // skip empty string 661 } 662 UStringTrieResult firstResult=trie.first(c); 663 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; 664 UStringTrieResult nextResult=trie.next(data[i].s[1]); 665 if(firstResult!=trie.reset().next(c) || 666 firstResult!=trie.current() || 667 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || 668 nextResult!=trie.next(data[i].s[1]) 669 ) { 670 errln("trie.first(%c)!=trie.reset().next(same) for %s", 671 c, data[i].s); 672 } 673 } 674 trie.reset(); 675 } 676 677 void BytesTrieTest::checkNext(BytesTrie &trie, 678 const StringAndValue data[], int32_t dataLength) { 679 BytesTrie::State state; 680 for(int32_t i=0; i<dataLength; ++i) { 681 int32_t stringLength= (i&1) ? -1 : strlen(data[i].s); 682 UStringTrieResult result; 683 if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) || 684 result!=trie.current() 685 ) { 686 errln("trie does not seem to contain %s", data[i].s); 687 } else if(trie.getValue()!=data[i].value) { 688 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 689 data[i].s, 690 (long)trie.getValue(), (long)trie.getValue(), 691 (long)data[i].value, (long)data[i].value); 692 } else if(result!=trie.current() || trie.getValue()!=data[i].value) { 693 errln("trie value for %s changes when repeating current()/getValue()", data[i].s); 694 } 695 trie.reset(); 696 stringLength=strlen(data[i].s); 697 result=trie.current(); 698 for(int32_t j=0; j<stringLength; ++j) { 699 if(!USTRINGTRIE_HAS_NEXT(result)) { 700 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j); 701 break; 702 } 703 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) { 704 trie.getValue(); 705 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) { 706 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j); 707 break; 708 } 709 } 710 result=trie.next(data[i].s[j]); 711 if(!USTRINGTRIE_MATCHES(result)) { 712 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j); 713 break; 714 } 715 if(result!=trie.current()) { 716 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j); 717 break; 718 } 719 } 720 if(!USTRINGTRIE_HAS_VALUE(result)) { 721 errln("trie.next()!=hasValue at the end of %s", data[i].s); 722 continue; 723 } 724 trie.getValue(); 725 if(result!=trie.current()) { 726 errln("trie.current() != current()+getValue()+current() after end of %s", 727 data[i].s); 728 } 729 // Compare the final current() with whether next() can actually continue. 730 trie.saveState(state); 731 UBool nextContinues=FALSE; 732 // Try all graphic characters; we only use those in test strings in this file. 733 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 734 const int32_t minChar=0x20; 735 const int32_t maxChar=0x7e; 736 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 737 const int32_t minChar=0x40; 738 const int32_t maxChar=0xfe; 739 #else 740 const int32_t minChar=0; 741 const int32_t maxChar=0xff; 742 #endif 743 for(int32_t c=minChar; c<=maxChar; ++c) { 744 if(trie.resetToState(state).next(c)) { 745 nextContinues=TRUE; 746 break; 747 } 748 } 749 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) { 750 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts " 751 "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s); 752 } 753 trie.reset(); 754 } 755 } 756 757 void BytesTrieTest::checkNextWithState(BytesTrie &trie, 758 const StringAndValue data[], int32_t dataLength) { 759 BytesTrie::State noState, state; 760 for(int32_t i=0; i<dataLength; ++i) { 761 if((i&1)==0) { 762 // This should have no effect. 763 trie.resetToState(noState); 764 } 765 const char *expectedString=data[i].s; 766 int32_t stringLength=strlen(expectedString); 767 int32_t partialLength=stringLength/3; 768 for(int32_t j=0; j<partialLength; ++j) { 769 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) { 770 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s); 771 return; 772 } 773 } 774 trie.saveState(state); 775 UStringTrieResult resultAtState=trie.current(); 776 UStringTrieResult result; 777 int32_t valueAtState=-99; 778 if(USTRINGTRIE_HAS_VALUE(resultAtState)) { 779 valueAtState=trie.getValue(); 780 } 781 result=trie.next(0); // mismatch 782 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) { 783 errln("trie.next(0) matched after part of %s", data[i].s); 784 } 785 if( resultAtState!=trie.resetToState(state).current() || 786 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue()) 787 ) { 788 errln("trie.next(part of %s) changes current()/getValue() after " 789 "saveState/next(0)/resetToState", 790 data[i].s); 791 } else if(!USTRINGTRIE_HAS_VALUE( 792 result=trie.next(expectedString+partialLength, 793 stringLength-partialLength)) || 794 result!=trie.current()) { 795 errln("trie.next(rest of %s) does not seem to contain %s after " 796 "saveState/next(0)/resetToState", 797 data[i].s, data[i].s); 798 } else if(!USTRINGTRIE_HAS_VALUE( 799 result=trie.resetToState(state). 800 next(expectedString+partialLength, 801 stringLength-partialLength)) || 802 result!=trie.current()) { 803 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState", 804 data[i].s); 805 } else if(trie.getValue()!=data[i].value) { 806 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 807 data[i].s, 808 (long)trie.getValue(), (long)trie.getValue(), 809 (long)data[i].value, (long)data[i].value); 810 } 811 trie.reset(); 812 } 813 } 814 815 // next(string) is also tested in other functions, 816 // but here we try to go partway through the string, and then beyond it. 817 void BytesTrieTest::checkNextString(BytesTrie &trie, 818 const StringAndValue data[], int32_t dataLength) { 819 for(int32_t i=0; i<dataLength; ++i) { 820 const char *expectedString=data[i].s; 821 int32_t stringLength=strlen(expectedString); 822 if(!trie.next(expectedString, stringLength/2)) { 823 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s); 824 continue; 825 } 826 // Test that we stop properly at the end of the string. 827 if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) { 828 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s); 829 } 830 trie.reset(); 831 } 832 } 833 834 void BytesTrieTest::checkIterator(const BytesTrie &trie, 835 const StringAndValue data[], int32_t dataLength) { 836 IcuTestErrorCode errorCode(*this, "checkIterator()"); 837 BytesTrie::Iterator iter(trie, 0, errorCode); 838 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 839 return; 840 } 841 checkIterator(iter, data, dataLength); 842 } 843 844 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter, 845 const StringAndValue data[], int32_t dataLength) { 846 IcuTestErrorCode errorCode(*this, "checkIterator()"); 847 for(int32_t i=0; i<dataLength; ++i) { 848 if(!iter.hasNext()) { 849 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s); 850 break; 851 } 852 UBool hasNext=iter.next(errorCode); 853 if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) { 854 break; 855 } 856 if(!hasNext) { 857 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s); 858 break; 859 } 860 if(iter.getString()!=StringPiece(data[i].s)) { 861 errln("trie iterator next().getString()=%s but expected %s for item %d", 862 iter.getString().data(), data[i].s, (int)i); 863 } 864 if(iter.getValue()!=data[i].value) { 865 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s", 866 (long)iter.getValue(), (long)iter.getValue(), 867 (long)data[i].value, (long)data[i].value, 868 (int)i, data[i].s); 869 } 870 } 871 if(iter.hasNext()) { 872 errln("trie iterator hasNext()=TRUE after all items"); 873 } 874 UBool hasNext=iter.next(errorCode); 875 errorCode.logIfFailureAndReset("trie iterator next() after all items"); 876 if(hasNext) { 877 errln("trie iterator next()=TRUE after all items"); 878 } 879 } 880