1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2012, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: bytetrietest.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2010nov16 12 * created by: Markus W. Scherer 13 */ 14 15 #include <string.h> 16 17 #include "unicode/utypes.h" 18 #include "unicode/bytestrie.h" 19 #include "unicode/bytestriebuilder.h" 20 #include "unicode/localpointer.h" 21 #include "unicode/stringpiece.h" 22 #include "intltest.h" 23 24 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 25 26 struct StringAndValue { 27 const char *s; 28 int32_t value; 29 }; 30 31 class BytesTrieTest : public IntlTest { 32 public: 33 BytesTrieTest(); 34 virtual ~BytesTrieTest(); 35 36 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); 37 void TestBuilder(); 38 void TestEmpty(); 39 void Test_a(); 40 void Test_a_ab(); 41 void TestShortestBranch(); 42 void TestBranches(); 43 void TestLongSequence(); 44 void TestLongBranch(); 45 void TestValuesForState(); 46 void TestCompact(); 47 48 BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); 49 void TestHasUniqueValue(); 50 void TestGetNextBytes(); 51 void TestIteratorFromBranch(); 52 void TestIteratorFromLinearMatch(); 53 void TestTruncatingIteratorFromRoot(); 54 void TestTruncatingIteratorFromLinearMatchShort(); 55 void TestTruncatingIteratorFromLinearMatchLong(); 56 void TestIteratorFromBytes(); 57 58 void checkData(const StringAndValue data[], int32_t dataLength); 59 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); 60 BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength, 61 UStringTrieBuildOption buildOption); 62 void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 63 void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 64 void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 65 void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 66 void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 67 void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); 68 69 private: 70 BytesTrieBuilder *builder_; 71 }; 72 73 extern IntlTest *createBytesTrieTest() { 74 return new BytesTrieTest(); 75 } 76 77 BytesTrieTest::BytesTrieTest() : builder_(NULL) { 78 IcuTestErrorCode errorCode(*this, "BytesTrieTest()"); 79 builder_=new BytesTrieBuilder(errorCode); 80 } 81 82 BytesTrieTest::~BytesTrieTest() { 83 delete builder_; 84 } 85 86 void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { 87 if(exec) { 88 logln("TestSuite BytesTrieTest: "); 89 } 90 TESTCASE_AUTO_BEGIN; 91 TESTCASE_AUTO(TestBuilder); 92 TESTCASE_AUTO(TestEmpty); 93 TESTCASE_AUTO(Test_a); 94 TESTCASE_AUTO(Test_a_ab); 95 TESTCASE_AUTO(TestShortestBranch); 96 TESTCASE_AUTO(TestBranches); 97 TESTCASE_AUTO(TestLongSequence); 98 TESTCASE_AUTO(TestLongBranch); 99 TESTCASE_AUTO(TestValuesForState); 100 TESTCASE_AUTO(TestCompact); 101 TESTCASE_AUTO(TestHasUniqueValue); 102 TESTCASE_AUTO(TestGetNextBytes); 103 TESTCASE_AUTO(TestIteratorFromBranch); 104 TESTCASE_AUTO(TestIteratorFromLinearMatch); 105 TESTCASE_AUTO(TestTruncatingIteratorFromRoot); 106 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); 107 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); 108 TESTCASE_AUTO(TestIteratorFromBytes); 109 TESTCASE_AUTO_END; 110 } 111 112 void BytesTrieTest::TestBuilder() { 113 IcuTestErrorCode errorCode(*this, "TestBuilder()"); 114 builder_->clear(); 115 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); 116 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { 117 errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); 118 return; 119 } 120 // TODO: remove .build(...) once add() checks for duplicates. 121 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); 122 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { 123 errln("BytesTrieBuilder.add() did not detect duplicates"); 124 return; 125 } 126 } 127 128 void BytesTrieTest::TestEmpty() { 129 static const StringAndValue data[]={ 130 { "", 0 } 131 }; 132 checkData(data, LENGTHOF(data)); 133 } 134 135 void BytesTrieTest::Test_a() { 136 static const StringAndValue data[]={ 137 { "a", 1 } 138 }; 139 checkData(data, LENGTHOF(data)); 140 } 141 142 void BytesTrieTest::Test_a_ab() { 143 static const StringAndValue data[]={ 144 { "a", 1 }, 145 { "ab", 100 } 146 }; 147 checkData(data, LENGTHOF(data)); 148 } 149 150 void BytesTrieTest::TestShortestBranch() { 151 static const StringAndValue data[]={ 152 { "a", 1000 }, 153 { "b", 2000 } 154 }; 155 checkData(data, LENGTHOF(data)); 156 } 157 158 void BytesTrieTest::TestBranches() { 159 static const StringAndValue data[]={ 160 { "a", 0x10 }, 161 { "cc", 0x40 }, 162 { "e", 0x100 }, 163 { "ggg", 0x400 }, 164 { "i", 0x1000 }, 165 { "kkkk", 0x4000 }, 166 { "n", 0x10000 }, 167 { "ppppp", 0x40000 }, 168 { "r", 0x100000 }, 169 { "sss", 0x200000 }, 170 { "t", 0x400000 }, 171 { "uu", 0x800000 }, 172 { "vv", 0x7fffffff }, 173 { "zz", (int32_t)0x80000000 } 174 }; 175 for(int32_t length=2; length<=LENGTHOF(data); ++length) { 176 infoln("TestBranches length=%d", (int)length); 177 checkData(data, length); 178 } 179 } 180 181 void BytesTrieTest::TestLongSequence() { 182 static const StringAndValue data[]={ 183 { "a", -1 }, 184 // sequence of linear-match nodes 185 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 }, 186 // more than 256 bytes 187 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 188 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 189 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 190 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 191 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 192 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 } 193 }; 194 checkData(data, LENGTHOF(data)); 195 } 196 197 void BytesTrieTest::TestLongBranch() { 198 // Split-branch and interesting compact-integer values. 199 static const StringAndValue data[]={ 200 { "a", -2 }, 201 { "b", -1 }, 202 { "c", 0 }, 203 { "d2", 1 }, 204 { "f", 0x3f }, 205 { "g", 0x40 }, 206 { "h", 0x41 }, 207 { "j23", 0x1900 }, 208 { "j24", 0x19ff }, 209 { "j25", 0x1a00 }, 210 { "k2", 0x1a80 }, 211 { "k3", 0x1aff }, 212 { "l234567890", 0x1b00 }, 213 { "l234567890123", 0x1b01 }, 214 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff }, 215 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 }, 216 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 }, 217 { "r", 0x333333 }, 218 { "s2345", 0x4444444 }, 219 { "t234567890", 0x77777777 }, 220 { "z", (int32_t)0x80000001 } 221 }; 222 checkData(data, LENGTHOF(data)); 223 } 224 225 void BytesTrieTest::TestValuesForState() { 226 // Check that saveState() and resetToState() interact properly 227 // with next() and current(). 228 static const StringAndValue data[]={ 229 { "a", -1 }, 230 { "ab", -2 }, 231 { "abc", -3 }, 232 { "abcd", -4 }, 233 { "abcde", -5 }, 234 { "abcdef", -6 } 235 }; 236 checkData(data, LENGTHOF(data)); 237 } 238 239 void BytesTrieTest::TestCompact() { 240 // Duplicate trailing strings and values provide opportunities for compacting. 241 static const StringAndValue data[]={ 242 { "+", 0 }, 243 { "+august", 8 }, 244 { "+december", 12 }, 245 { "+july", 7 }, 246 { "+june", 6 }, 247 { "+november", 11 }, 248 { "+october", 10 }, 249 { "+september", 9 }, 250 { "-", 0 }, 251 { "-august", 8 }, 252 { "-december", 12 }, 253 { "-july", 7 }, 254 { "-june", 6 }, 255 { "-november", 11 }, 256 { "-october", 10 }, 257 { "-september", 9 }, 258 // The l+n branch (with its sub-nodes) is a duplicate but will be written 259 // both times because each time it follows a different linear-match node. 260 { "xjuly", 7 }, 261 { "xjune", 6 } 262 }; 263 checkData(data, LENGTHOF(data)); 264 } 265 266 BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { 267 // All types of nodes leading to the same value, 268 // for code coverage of recursive functions. 269 // In particular, we need a lot of branches on some single level 270 // to exercise a split-branch node. 271 static const StringAndValue data[]={ 272 { "august", 8 }, 273 { "jan", 1 }, 274 { "jan.", 1 }, 275 { "jana", 1 }, 276 { "janbb", 1 }, 277 { "janc", 1 }, 278 { "janddd", 1 }, 279 { "janee", 1 }, 280 { "janef", 1 }, 281 { "janf", 1 }, 282 { "jangg", 1 }, 283 { "janh", 1 }, 284 { "janiiii", 1 }, 285 { "janj", 1 }, 286 { "jankk", 1 }, 287 { "jankl", 1 }, 288 { "jankmm", 1 }, 289 { "janl", 1 }, 290 { "janm", 1 }, 291 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 292 { "jano", 1 }, 293 { "janpp", 1 }, 294 { "janqqq", 1 }, 295 { "janr", 1 }, 296 { "januar", 1 }, 297 { "january", 1 }, 298 { "july", 7 }, 299 { "jun", 6 }, 300 { "jun.", 6 }, 301 { "june", 6 } 302 }; 303 return buildTrie(data, LENGTHOF(data), buildOption); 304 } 305 306 void BytesTrieTest::TestHasUniqueValue() { 307 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 308 if(trie.isNull()) { 309 return; // buildTrie() reported an error 310 } 311 int32_t uniqueValue; 312 if(trie->hasUniqueValue(uniqueValue)) { 313 errln("unique value at root"); 314 } 315 trie->next('j'); 316 trie->next('a'); 317 trie->next('n'); 318 // hasUniqueValue() directly after next() 319 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { 320 errln("not unique value 1 after \"jan\""); 321 } 322 trie->first('j'); 323 trie->next('u'); 324 if(trie->hasUniqueValue(uniqueValue)) { 325 errln("unique value after \"ju\""); 326 } 327 if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { 328 errln("not normal value 6 after \"jun\""); 329 } 330 // hasUniqueValue() after getValue() 331 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { 332 errln("not unique value 6 after \"jun\""); 333 } 334 // hasUniqueValue() from within a linear-match node 335 trie->first('a'); 336 trie->next('u'); 337 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { 338 errln("not unique value 8 after \"au\""); 339 } 340 } 341 342 void BytesTrieTest::TestGetNextBytes() { 343 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 344 if(trie.isNull()) { 345 return; // buildTrie() reported an error 346 } 347 char buffer[40]; 348 CheckedArrayByteSink sink(buffer, LENGTHOF(buffer)); 349 int32_t count=trie->getNextBytes(sink); 350 if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') { 351 errln("months getNextBytes()!=[aj] at root"); 352 } 353 trie->next('j'); 354 trie->next('a'); 355 trie->next('n'); 356 // getNextBytes() directly after next() 357 count=trie->getNextBytes(sink.Reset()); 358 buffer[count]=0; 359 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) { 360 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\""); 361 } 362 // getNextBytes() after getValue() 363 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. 364 memset(buffer, 0, sizeof(buffer)); 365 count=trie->getNextBytes(sink.Reset()); 366 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) { 367 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); 368 } 369 // getNextBytes() from a linear-match node 370 trie->next('u'); 371 memset(buffer, 0, sizeof(buffer)); 372 count=trie->getNextBytes(sink.Reset()); 373 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') { 374 errln("months getNextBytes()!=[a] after \"janu\""); 375 } 376 trie->next('a'); 377 memset(buffer, 0, sizeof(buffer)); 378 count=trie->getNextBytes(sink.Reset()); 379 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') { 380 errln("months getNextBytes()!=[r] after \"janua\""); 381 } 382 trie->next('r'); 383 trie->next('y'); 384 // getNextBytes() after a final match 385 count=trie->getNextBytes(sink.Reset()); 386 if(count!=0 || sink.NumberOfBytesAppended()!=0) { 387 errln("months getNextBytes()!=[] after \"january\""); 388 } 389 } 390 391 void BytesTrieTest::TestIteratorFromBranch() { 392 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 393 if(trie.isNull()) { 394 return; // buildTrie() reported an error 395 } 396 // Go to a branch node. 397 trie->next('j'); 398 trie->next('a'); 399 trie->next('n'); 400 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); 401 BytesTrie::Iterator iter(*trie, 0, errorCode); 402 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 403 return; 404 } 405 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 406 // following "jan". 407 static const StringAndValue data[]={ 408 { "", 1 }, 409 { ".", 1 }, 410 { "a", 1 }, 411 { "bb", 1 }, 412 { "c", 1 }, 413 { "ddd", 1 }, 414 { "ee", 1 }, 415 { "ef", 1 }, 416 { "f", 1 }, 417 { "gg", 1 }, 418 { "h", 1 }, 419 { "iiii", 1 }, 420 { "j", 1 }, 421 { "kk", 1 }, 422 { "kl", 1 }, 423 { "kmm", 1 }, 424 { "l", 1 }, 425 { "m", 1 }, 426 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 427 { "o", 1 }, 428 { "pp", 1 }, 429 { "qqq", 1 }, 430 { "r", 1 }, 431 { "uar", 1 }, 432 { "uary", 1 } 433 }; 434 checkIterator(iter, data, LENGTHOF(data)); 435 // Reset, and we should get the same result. 436 logln("after iter.reset()"); 437 checkIterator(iter.reset(), data, LENGTHOF(data)); 438 } 439 440 void BytesTrieTest::TestIteratorFromLinearMatch() { 441 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 442 if(trie.isNull()) { 443 return; // buildTrie() reported an error 444 } 445 // Go into a linear-match node. 446 trie->next('j'); 447 trie->next('a'); 448 trie->next('n'); 449 trie->next('u'); 450 trie->next('a'); 451 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); 452 BytesTrie::Iterator iter(*trie, 0, errorCode); 453 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 454 return; 455 } 456 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 457 // following "janua". 458 static const StringAndValue data[]={ 459 { "r", 1 }, 460 { "ry", 1 } 461 }; 462 checkIterator(iter, data, LENGTHOF(data)); 463 // Reset, and we should get the same result. 464 logln("after iter.reset()"); 465 checkIterator(iter.reset(), data, LENGTHOF(data)); 466 } 467 468 void BytesTrieTest::TestTruncatingIteratorFromRoot() { 469 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 470 if(trie.isNull()) { 471 return; // buildTrie() reported an error 472 } 473 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); 474 BytesTrie::Iterator iter(*trie, 4, errorCode); 475 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 476 return; 477 } 478 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters 479 // of each string, and no string duplicates from the truncation. 480 static const StringAndValue data[]={ 481 { "augu", -1 }, 482 { "jan", 1 }, 483 { "jan.", 1 }, 484 { "jana", 1 }, 485 { "janb", -1 }, 486 { "janc", 1 }, 487 { "jand", -1 }, 488 { "jane", -1 }, 489 { "janf", 1 }, 490 { "jang", -1 }, 491 { "janh", 1 }, 492 { "jani", -1 }, 493 { "janj", 1 }, 494 { "jank", -1 }, 495 { "janl", 1 }, 496 { "janm", 1 }, 497 { "jann", -1 }, 498 { "jano", 1 }, 499 { "janp", -1 }, 500 { "janq", -1 }, 501 { "janr", 1 }, 502 { "janu", -1 }, 503 { "july", 7 }, 504 { "jun", 6 }, 505 { "jun.", 6 }, 506 { "june", 6 } 507 }; 508 checkIterator(iter, data, LENGTHOF(data)); 509 // Reset, and we should get the same result. 510 logln("after iter.reset()"); 511 checkIterator(iter.reset(), data, LENGTHOF(data)); 512 } 513 514 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() { 515 static const StringAndValue data[]={ 516 { "abcdef", 10 }, 517 { "abcdepq", 200 }, 518 { "abcdeyz", 3000 } 519 }; 520 LocalPointer<BytesTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 521 if(trie.isNull()) { 522 return; // buildTrie() reported an error 523 } 524 // Go into a linear-match node. 525 trie->next('a'); 526 trie->next('b'); 527 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); 528 // Truncate within the linear-match node. 529 BytesTrie::Iterator iter(*trie, 2, errorCode); 530 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 531 return; 532 } 533 static const StringAndValue expected[]={ 534 { "cd", -1 } 535 }; 536 checkIterator(iter, expected, LENGTHOF(expected)); 537 // Reset, and we should get the same result. 538 logln("after iter.reset()"); 539 checkIterator(iter.reset(), expected, LENGTHOF(expected)); 540 } 541 542 void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() { 543 static const StringAndValue data[]={ 544 { "abcdef", 10 }, 545 { "abcdepq", 200 }, 546 { "abcdeyz", 3000 } 547 }; 548 LocalPointer<BytesTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 549 if(trie.isNull()) { 550 return; // buildTrie() reported an error 551 } 552 // Go into a linear-match node. 553 trie->next('a'); 554 trie->next('b'); 555 trie->next('c'); 556 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); 557 // Truncate after the linear-match node. 558 BytesTrie::Iterator iter(*trie, 3, errorCode); 559 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 560 return; 561 } 562 static const StringAndValue expected[]={ 563 { "def", 10 }, 564 { "dep", -1 }, 565 { "dey", -1 } 566 }; 567 checkIterator(iter, expected, LENGTHOF(expected)); 568 // Reset, and we should get the same result. 569 logln("after iter.reset()"); 570 checkIterator(iter.reset(), expected, LENGTHOF(expected)); 571 } 572 573 void BytesTrieTest::TestIteratorFromBytes() { 574 static const StringAndValue data[]={ 575 { "mm", 3 }, 576 { "mmm", 33 }, 577 { "mmnop", 333 } 578 }; 579 builder_->clear(); 580 IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()"); 581 for(int32_t i=0; i<LENGTHOF(data); ++i) { 582 builder_->add(data[i].s, data[i].value, errorCode); 583 } 584 StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode); 585 BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode); 586 checkIterator(iter, data, LENGTHOF(data)); 587 } 588 589 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { 590 logln("checkData(dataLength=%d, fast)", (int)dataLength); 591 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); 592 logln("checkData(dataLength=%d, small)", (int)dataLength); 593 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); 594 } 595 596 void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { 597 LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption)); 598 if(trie.isNull()) { 599 return; // buildTrie() reported an error 600 } 601 checkFirst(*trie, data, dataLength); 602 checkNext(*trie, data, dataLength); 603 checkNextWithState(*trie, data, dataLength); 604 checkNextString(*trie, data, dataLength); 605 checkIterator(*trie, data, dataLength); 606 } 607 608 BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, 609 UStringTrieBuildOption buildOption) { 610 IcuTestErrorCode errorCode(*this, "buildTrie()"); 611 // Add the items to the trie builder in an interesting (not trivial, not random) order. 612 int32_t index, step; 613 if(dataLength&1) { 614 // Odd number of items. 615 index=dataLength/2; 616 step=2; 617 } else if((dataLength%3)!=0) { 618 // Not a multiple of 3. 619 index=dataLength/5; 620 step=3; 621 } else { 622 index=dataLength-1; 623 step=-1; 624 } 625 builder_->clear(); 626 for(int32_t i=0; i<dataLength; ++i) { 627 builder_->add(data[index].s, data[index].value, errorCode); 628 index=(index+step)%dataLength; 629 } 630 StringPiece sp=builder_->buildStringPiece(buildOption, errorCode); 631 LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode)); 632 if(!errorCode.logIfFailureAndReset("add()/build()")) { 633 builder_->add("zzz", 999, errorCode); 634 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { 635 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); 636 } 637 } 638 logln("serialized trie size: %ld bytes\n", (long)sp.length()); 639 StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode); 640 if(sp.data()==sp2.data()) { 641 errln("builder.buildStringPiece() before & after build() returned same array"); 642 } 643 if(errorCode.isFailure()) { 644 return NULL; 645 } 646 // Tries from either build() method should be identical but 647 // BytesTrie does not implement equals(). 648 // We just return either one. 649 if((dataLength&1)!=0) { 650 return trie.orphan(); 651 } else { 652 return new BytesTrie(sp2.data()); 653 } 654 } 655 656 void BytesTrieTest::checkFirst(BytesTrie &trie, 657 const StringAndValue data[], int32_t dataLength) { 658 for(int32_t i=0; i<dataLength; ++i) { 659 int c=*data[i].s; 660 if(c==0) { 661 continue; // skip empty string 662 } 663 UStringTrieResult firstResult=trie.first(c); 664 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; 665 UStringTrieResult nextResult=trie.next(data[i].s[1]); 666 if(firstResult!=trie.reset().next(c) || 667 firstResult!=trie.current() || 668 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || 669 nextResult!=trie.next(data[i].s[1]) 670 ) { 671 errln("trie.first(%c)!=trie.reset().next(same) for %s", 672 c, data[i].s); 673 } 674 } 675 trie.reset(); 676 } 677 678 void BytesTrieTest::checkNext(BytesTrie &trie, 679 const StringAndValue data[], int32_t dataLength) { 680 BytesTrie::State state; 681 for(int32_t i=0; i<dataLength; ++i) { 682 int32_t stringLength= (i&1) ? -1 : strlen(data[i].s); 683 UStringTrieResult result; 684 if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) || 685 result!=trie.current() 686 ) { 687 errln("trie does not seem to contain %s", data[i].s); 688 } else if(trie.getValue()!=data[i].value) { 689 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 690 data[i].s, 691 (long)trie.getValue(), (long)trie.getValue(), 692 (long)data[i].value, (long)data[i].value); 693 } else if(result!=trie.current() || trie.getValue()!=data[i].value) { 694 errln("trie value for %s changes when repeating current()/getValue()", data[i].s); 695 } 696 trie.reset(); 697 stringLength=strlen(data[i].s); 698 result=trie.current(); 699 for(int32_t j=0; j<stringLength; ++j) { 700 if(!USTRINGTRIE_HAS_NEXT(result)) { 701 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j); 702 break; 703 } 704 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) { 705 trie.getValue(); 706 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) { 707 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j); 708 break; 709 } 710 } 711 result=trie.next(data[i].s[j]); 712 if(!USTRINGTRIE_MATCHES(result)) { 713 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j); 714 break; 715 } 716 if(result!=trie.current()) { 717 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j); 718 break; 719 } 720 } 721 if(!USTRINGTRIE_HAS_VALUE(result)) { 722 errln("trie.next()!=hasValue at the end of %s", data[i].s); 723 continue; 724 } 725 trie.getValue(); 726 if(result!=trie.current()) { 727 errln("trie.current() != current()+getValue()+current() after end of %s", 728 data[i].s); 729 } 730 // Compare the final current() with whether next() can actually continue. 731 trie.saveState(state); 732 UBool nextContinues=FALSE; 733 // Try all graphic characters; we only use those in test strings in this file. 734 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 735 const int32_t minChar=0x20; 736 const int32_t maxChar=0x7e; 737 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 738 const int32_t minChar=0x40; 739 const int32_t maxChar=0xfe; 740 #else 741 const int32_t minChar=0; 742 const int32_t maxChar=0xff; 743 #endif 744 for(int32_t c=minChar; c<=maxChar; ++c) { 745 if(trie.resetToState(state).next(c)) { 746 nextContinues=TRUE; 747 break; 748 } 749 } 750 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) { 751 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts " 752 "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s); 753 } 754 trie.reset(); 755 } 756 } 757 758 void BytesTrieTest::checkNextWithState(BytesTrie &trie, 759 const StringAndValue data[], int32_t dataLength) { 760 BytesTrie::State noState, state; 761 for(int32_t i=0; i<dataLength; ++i) { 762 if((i&1)==0) { 763 // This should have no effect. 764 trie.resetToState(noState); 765 } 766 const char *expectedString=data[i].s; 767 int32_t stringLength=strlen(expectedString); 768 int32_t partialLength=stringLength/3; 769 for(int32_t j=0; j<partialLength; ++j) { 770 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) { 771 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s); 772 return; 773 } 774 } 775 trie.saveState(state); 776 UStringTrieResult resultAtState=trie.current(); 777 UStringTrieResult result; 778 int32_t valueAtState=-99; 779 if(USTRINGTRIE_HAS_VALUE(resultAtState)) { 780 valueAtState=trie.getValue(); 781 } 782 result=trie.next(0); // mismatch 783 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) { 784 errln("trie.next(0) matched after part of %s", data[i].s); 785 } 786 if( resultAtState!=trie.resetToState(state).current() || 787 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue()) 788 ) { 789 errln("trie.next(part of %s) changes current()/getValue() after " 790 "saveState/next(0)/resetToState", 791 data[i].s); 792 } else if(!USTRINGTRIE_HAS_VALUE( 793 result=trie.next(expectedString+partialLength, 794 stringLength-partialLength)) || 795 result!=trie.current()) { 796 errln("trie.next(rest of %s) does not seem to contain %s after " 797 "saveState/next(0)/resetToState", 798 data[i].s, data[i].s); 799 } else if(!USTRINGTRIE_HAS_VALUE( 800 result=trie.resetToState(state). 801 next(expectedString+partialLength, 802 stringLength-partialLength)) || 803 result!=trie.current()) { 804 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState", 805 data[i].s); 806 } else if(trie.getValue()!=data[i].value) { 807 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 808 data[i].s, 809 (long)trie.getValue(), (long)trie.getValue(), 810 (long)data[i].value, (long)data[i].value); 811 } 812 trie.reset(); 813 } 814 } 815 816 // next(string) is also tested in other functions, 817 // but here we try to go partway through the string, and then beyond it. 818 void BytesTrieTest::checkNextString(BytesTrie &trie, 819 const StringAndValue data[], int32_t dataLength) { 820 for(int32_t i=0; i<dataLength; ++i) { 821 const char *expectedString=data[i].s; 822 int32_t stringLength=strlen(expectedString); 823 if(!trie.next(expectedString, stringLength/2)) { 824 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s); 825 continue; 826 } 827 // Test that we stop properly at the end of the string. 828 if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) { 829 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s); 830 } 831 trie.reset(); 832 } 833 } 834 835 void BytesTrieTest::checkIterator(const BytesTrie &trie, 836 const StringAndValue data[], int32_t dataLength) { 837 IcuTestErrorCode errorCode(*this, "checkIterator()"); 838 BytesTrie::Iterator iter(trie, 0, errorCode); 839 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 840 return; 841 } 842 checkIterator(iter, data, dataLength); 843 } 844 845 void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter, 846 const StringAndValue data[], int32_t dataLength) { 847 IcuTestErrorCode errorCode(*this, "checkIterator()"); 848 for(int32_t i=0; i<dataLength; ++i) { 849 if(!iter.hasNext()) { 850 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s); 851 break; 852 } 853 UBool hasNext=iter.next(errorCode); 854 if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) { 855 break; 856 } 857 if(!hasNext) { 858 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s); 859 break; 860 } 861 if(iter.getString()!=StringPiece(data[i].s)) { 862 errln("trie iterator next().getString()=%s but expected %s for item %d", 863 iter.getString().data(), data[i].s, (int)i); 864 } 865 if(iter.getValue()!=data[i].value) { 866 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s", 867 (long)iter.getValue(), (long)iter.getValue(), 868 (long)data[i].value, (long)data[i].value, 869 (int)i, data[i].s); 870 } 871 } 872 if(iter.hasNext()) { 873 errln("trie iterator hasNext()=TRUE after all items"); 874 } 875 UBool hasNext=iter.next(errorCode); 876 errorCode.logIfFailureAndReset("trie iterator next() after all items"); 877 if(hasNext) { 878 errln("trie iterator next()=TRUE after all items"); 879 } 880 } 881