1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 9 #include "unicode/utypes.h" 10 11 #if !UCONFIG_NO_COLLATION 12 13 #include "unicode/coll.h" 14 #include "unicode/tblcoll.h" 15 #include "unicode/unistr.h" 16 #include "unicode/sortkey.h" 17 #include "itercoll.h" 18 #include "unicode/schriter.h" 19 #include "unicode/chariter.h" 20 #include "unicode/uchar.h" 21 #include "cmemory.h" 22 23 static UErrorCode status = U_ZERO_ERROR; 24 25 CollationIteratorTest::CollationIteratorTest() 26 : test1("What subset of all possible test cases?", ""), 27 test2("has the highest probability of detecting", "") 28 { 29 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status); 30 if(U_FAILURE(status)) { 31 delete en_us; 32 en_us = 0; 33 errcheckln(status, "Collator creation failed with %s", u_errorName(status)); 34 return; 35 } 36 37 } 38 39 CollationIteratorTest::~CollationIteratorTest() 40 { 41 delete en_us; 42 } 43 44 /** 45 * Test for CollationElementIterator previous and next for the whole set of 46 * unicode characters. 47 */ 48 void CollationIteratorTest::TestUnicodeChar() 49 { 50 CollationElementIterator *iter; 51 UChar codepoint; 52 UnicodeString source; 53 54 for (codepoint = 1; codepoint < 0xFFFE;) 55 { 56 source.remove(); 57 58 while (codepoint % 0xFF != 0) 59 { 60 if (u_isdefined(codepoint)) 61 source += codepoint; 62 codepoint ++; 63 } 64 65 if (u_isdefined(codepoint)) 66 source += codepoint; 67 68 if (codepoint != 0xFFFF) 69 codepoint ++; 70 71 iter = en_us->createCollationElementIterator(source); 72 /* A basic test to see if it's working at all */ 73 backAndForth(*iter); 74 delete iter; 75 } 76 } 77 78 /** 79 * Test for CollationElementIterator.previous() 80 * 81 * @bug 4108758 - Make sure it works with contracting characters 82 * 83 */ 84 void CollationIteratorTest::TestPrevious(/* char* par */) 85 { 86 UErrorCode status = U_ZERO_ERROR; 87 CollationElementIterator *iter = en_us->createCollationElementIterator(test1); 88 89 // A basic test to see if it's working at all 90 backAndForth(*iter); 91 delete iter; 92 93 // Test with a contracting character sequence 94 UnicodeString source; 95 RuleBasedCollator *c1 = NULL; 96 c1 = new RuleBasedCollator( 97 (UnicodeString)"&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status); 98 99 if (c1 == NULL || U_FAILURE(status)) 100 { 101 errln("Couldn't create a RuleBasedCollator with a contracting sequence."); 102 delete c1; 103 return; 104 } 105 106 source = "abchdcba"; 107 iter = c1->createCollationElementIterator(source); 108 backAndForth(*iter); 109 delete iter; 110 delete c1; 111 112 // Test with an expanding character sequence 113 RuleBasedCollator *c2 = NULL; 114 c2 = new RuleBasedCollator((UnicodeString)"&a < b < c/abd < d", status); 115 116 if (c2 == NULL || U_FAILURE(status)) 117 { 118 errln("Couldn't create a RuleBasedCollator with an expanding sequence."); 119 delete c2; 120 return; 121 } 122 123 source = "abcd"; 124 iter = c2->createCollationElementIterator(source); 125 backAndForth(*iter); 126 delete iter; 127 delete c2; 128 129 // Now try both 130 RuleBasedCollator *c3 = NULL; 131 c3 = new RuleBasedCollator((UnicodeString)"&a < b < c/aba < d < z < ch", status); 132 133 if (c3 == NULL || U_FAILURE(status)) 134 { 135 errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence."); 136 delete c3; 137 return; 138 } 139 140 source = "abcdbchdc"; 141 iter = c3->createCollationElementIterator(source); 142 backAndForth(*iter); 143 delete iter; 144 delete c3; 145 146 status=U_ZERO_ERROR; 147 source= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc"); 148 149 Collator *c4 = Collator::createInstance(Locale("th", "TH", ""), status); 150 if(U_FAILURE(status)){ 151 errln("Couldn't create a collator"); 152 } 153 iter = ((RuleBasedCollator*)c4)->createCollationElementIterator(source); 154 backAndForth(*iter); 155 delete iter; 156 delete c4; 157 158 source= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC"); 159 Collator *c5 = Collator::createInstance(Locale("ja", "JP", ""), status); 160 161 iter = ((RuleBasedCollator*)c5)->createCollationElementIterator(source); 162 if(U_FAILURE(status)){ 163 errln("Couldn't create Japanese collator\n"); 164 } 165 backAndForth(*iter); 166 delete iter; 167 delete c5; 168 } 169 170 /** 171 * Test for getOffset() and setOffset() 172 */ 173 void CollationIteratorTest::TestOffset(/* char* par */) 174 { 175 CollationElementIterator *iter = en_us->createCollationElementIterator(test1); 176 UErrorCode status = U_ZERO_ERROR; 177 // testing boundaries 178 iter->setOffset(0, status); 179 if (U_FAILURE(status) || iter->previous(status) != CollationElementIterator::NULLORDER) { 180 errln("Error: After setting offset to 0, we should be at the end " 181 "of the backwards iteration"); 182 } 183 iter->setOffset(test1.length(), status); 184 if (U_FAILURE(status) || iter->next(status) != CollationElementIterator::NULLORDER) { 185 errln("Error: After setting offset to end of the string, we should " 186 "be at the end of the backwards iteration"); 187 } 188 189 // Run all the way through the iterator, then get the offset 190 int32_t orderLength = 0; 191 Order *orders = getOrders(*iter, orderLength); 192 193 int32_t offset = iter->getOffset(); 194 195 if (offset != test1.length()) 196 { 197 UnicodeString msg1("offset at end != length: "); 198 UnicodeString msg2(" vs "); 199 200 errln(msg1 + offset + msg2 + test1.length()); 201 } 202 203 // Now set the offset back to the beginning and see if it works 204 CollationElementIterator *pristine = en_us->createCollationElementIterator(test1); 205 206 iter->setOffset(0, status); 207 208 if (U_FAILURE(status)) 209 { 210 errln("setOffset failed."); 211 } 212 else 213 { 214 assertEqual(*iter, *pristine); 215 } 216 217 delete pristine; 218 delete[] orders; 219 delete iter; 220 221 // setting offset in the middle of a contraction 222 UnicodeString contraction = "change"; 223 status = U_ZERO_ERROR; 224 RuleBasedCollator tailored("& a < ch", status); 225 if (U_FAILURE(status)) { 226 errln("Error: in creation of Spanish collator - %s", u_errorName(status)); 227 return; 228 } 229 iter = tailored.createCollationElementIterator(contraction); 230 Order *order = getOrders(*iter, orderLength); 231 iter->setOffset(1, status); // sets offset in the middle of ch 232 int32_t order2Length = 0; 233 Order *order2 = getOrders(*iter, order2Length); 234 if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) { 235 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction"); 236 } 237 delete[] order; 238 delete[] order2; 239 delete iter; 240 contraction = "peache"; 241 iter = tailored.createCollationElementIterator(contraction); 242 iter->setOffset(3, status); 243 order = getOrders(*iter, orderLength); 244 iter->setOffset(4, status); // sets offset in the middle of ch 245 order2 = getOrders(*iter, order2Length); 246 if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) { 247 errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction"); 248 } 249 delete[] order; 250 delete[] order2; 251 delete iter; 252 // setting offset in the middle of a surrogate pair 253 UnicodeString surrogate = UNICODE_STRING_SIMPLE("\\ud800\\udc00str").unescape(); 254 iter = tailored.createCollationElementIterator(surrogate); 255 order = getOrders(*iter, orderLength); 256 iter->setOffset(1, status); // sets offset in the middle of surrogate 257 order2 = getOrders(*iter, order2Length); 258 if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) { 259 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair"); 260 } 261 delete[] order; 262 delete[] order2; 263 delete iter; 264 surrogate = UNICODE_STRING_SIMPLE("simple\\ud800\\udc00str").unescape(); 265 iter = tailored.createCollationElementIterator(surrogate); 266 iter->setOffset(6, status); 267 order = getOrders(*iter, orderLength); 268 iter->setOffset(7, status); // sets offset in the middle of surrogate 269 order2 = getOrders(*iter, order2Length); 270 if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) { 271 errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair"); 272 } 273 delete[] order; 274 delete[] order2; 275 delete iter; 276 // TODO: try iterating halfway through a messy string. 277 } 278 279 /** 280 * Test for setText() 281 */ 282 void CollationIteratorTest::TestSetText(/* char* par */) 283 { 284 CollationElementIterator *iter1 = en_us->createCollationElementIterator(test1); 285 CollationElementIterator *iter2 = en_us->createCollationElementIterator(test2); 286 UErrorCode status = U_ZERO_ERROR; 287 288 // Run through the second iterator just to exercise it 289 int32_t c = iter2->next(status); 290 int32_t i = 0; 291 292 while ( ++i < 10 && c != CollationElementIterator::NULLORDER) 293 { 294 if (U_FAILURE(status)) 295 { 296 errln("iter2->next() returned an error."); 297 delete iter2; 298 delete iter1; 299 } 300 301 c = iter2->next(status); 302 } 303 304 // Now set it to point to the same string as the first iterator 305 iter2->setText(test1, status); 306 307 if (U_FAILURE(status)) 308 { 309 errln("call to iter2->setText(test1) failed."); 310 } 311 else 312 { 313 assertEqual(*iter1, *iter2); 314 } 315 iter1->reset(); 316 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text 317 CharacterIterator* chariter = new StringCharacterIterator(test1); 318 iter2->setText(*chariter, status); 319 if (U_FAILURE(status)) 320 { 321 errln("call to iter2->setText(chariter(test1)) failed."); 322 } 323 else 324 { 325 assertEqual(*iter1, *iter2); 326 } 327 328 // test for an empty string 329 UnicodeString empty(""); 330 iter1->setText(empty, status); 331 if (U_FAILURE(status) 332 || iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) { 333 errln("Empty string should have no CEs."); 334 } 335 ((StringCharacterIterator *)chariter)->setText(empty); 336 iter1->setText(*chariter, status); 337 if (U_FAILURE(status) 338 || iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) { 339 errln("Empty string should have no CEs."); 340 } 341 delete chariter; 342 delete iter2; 343 delete iter1; 344 } 345 346 /** @bug 4108762 347 * Test for getMaxExpansion() 348 */ 349 void CollationIteratorTest::TestMaxExpansion(/* char* par */) 350 { 351 UErrorCode status = U_ZERO_ERROR; 352 UnicodeString rule("&a < ab < c/aba < d < z < ch"); 353 RuleBasedCollator *coll = new RuleBasedCollator(rule, status); 354 UChar ch = 0; 355 UnicodeString str(ch); 356 357 CollationElementIterator *iter = coll->createCollationElementIterator(str); 358 359 while (ch < 0xFFFF && U_SUCCESS(status)) { 360 int count = 1; 361 uint32_t order; 362 ch ++; 363 UnicodeString str(ch); 364 iter->setText(str, status); 365 order = iter->previous(status); 366 367 /* thai management */ 368 if (CollationElementIterator::isIgnorable(order)) 369 order = iter->previous(status); 370 371 while (U_SUCCESS(status) 372 && iter->previous(status) != (int32_t)CollationElementIterator::NULLORDER) 373 { 374 count ++; 375 } 376 377 if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) { 378 errln("Failure at codepoint %d, maximum expansion count < %d\n", 379 ch, count); 380 } 381 } 382 383 delete iter; 384 delete coll; 385 } 386 387 /* 388 * @bug 4157299 389 */ 390 void CollationIteratorTest::TestClearBuffers(/* char* par */) 391 { 392 UErrorCode status = U_ZERO_ERROR; 393 RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status); 394 395 if (c == NULL || U_FAILURE(status)) 396 { 397 errln("Couldn't create a RuleBasedCollator."); 398 delete c; 399 return; 400 } 401 402 UnicodeString source("abcd"); 403 CollationElementIterator *i = c->createCollationElementIterator(source); 404 int32_t e0 = i->next(status); // save the first collation element 405 406 if (U_FAILURE(status)) 407 { 408 errln("call to i->next() failed. err=%s", u_errorName(status)); 409 } 410 else 411 { 412 i->setOffset(3, status); // go to the expanding character 413 414 if (U_FAILURE(status)) 415 { 416 errln("call to i->setOffset(3) failed. err=%s", u_errorName(status)); 417 } 418 else 419 { 420 i->next(status); // but only use up half of it 421 422 if (U_FAILURE(status)) 423 { 424 errln("call to i->next() failed. err=%s", u_errorName(status)); 425 } 426 else 427 { 428 i->setOffset(0, status); // go back to the beginning 429 430 if (U_FAILURE(status)) 431 { 432 errln("call to i->setOffset(0) failed. err=%s", u_errorName(status)); 433 } 434 else 435 { 436 int32_t e = i->next(status); // and get this one again 437 438 if (U_FAILURE(status)) 439 { 440 errln("call to i->next() failed. err=%s", u_errorName(status)); 441 } 442 else if (e != e0) 443 { 444 errln("got 0x%X, expected 0x%X", e, e0); 445 } 446 } 447 } 448 } 449 } 450 451 delete i; 452 delete c; 453 } 454 455 /** 456 * Testing the assignment operator 457 */ 458 void CollationIteratorTest::TestAssignment() 459 { 460 UErrorCode status = U_ZERO_ERROR; 461 RuleBasedCollator *coll = 462 (RuleBasedCollator *)Collator::createInstance(status); 463 464 if (coll == NULL || U_FAILURE(status)) 465 { 466 errln("Couldn't create a default collator."); 467 return; 468 } 469 470 UnicodeString source("abcd"); 471 CollationElementIterator *iter1 = 472 coll->createCollationElementIterator(source); 473 474 CollationElementIterator iter2 = *iter1; 475 476 if (*iter1 != iter2) { 477 errln("Fail collation iterator assignment does not produce the same elements"); 478 } 479 480 CollationElementIterator iter3(*iter1); 481 482 if (*iter1 != iter3) { 483 errln("Fail collation iterator copy constructor does not produce the same elements"); 484 } 485 486 source = CharsToUnicodeString("a\\u0300\\u0325"); 487 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 488 CollationElementIterator *iter4 489 = coll->createCollationElementIterator(source); 490 CollationElementIterator iter5(*iter4); 491 if (*iter4 != iter5) { 492 errln("collation iterator assignment does not produce the same elements"); 493 } 494 iter4->next(status); 495 if (U_FAILURE(status) || *iter4 == iter5) { 496 errln("collation iterator not equal"); 497 } 498 iter5.next(status); 499 if (U_FAILURE(status) || *iter4 != iter5) { 500 errln("collation iterator equal"); 501 } 502 iter4->next(status); 503 if (U_FAILURE(status) || *iter4 == iter5) { 504 errln("collation iterator not equal"); 505 } 506 iter5.next(status); 507 if (U_FAILURE(status) || *iter4 != iter5) { 508 errln("collation iterator equal"); 509 } 510 CollationElementIterator iter6(*iter4); 511 if (*iter4 != iter6) { 512 errln("collation iterator equal"); 513 } 514 iter4->next(status); 515 if (U_FAILURE(status) || *iter4 == iter5) { 516 errln("collation iterator not equal"); 517 } 518 iter5.next(status); 519 if (U_FAILURE(status) || *iter4 != iter5) { 520 errln("collation iterator equal"); 521 } 522 iter4->next(status); 523 if (U_FAILURE(status) || *iter4 == iter5) { 524 errln("collation iterator not equal"); 525 } 526 iter5.next(status); 527 if (U_FAILURE(status) || *iter4 != iter5) { 528 errln("collation iterator equal"); 529 } 530 delete iter1; 531 delete iter4; 532 delete coll; 533 } 534 535 /** 536 * Testing the constructors 537 */ 538 void CollationIteratorTest::TestConstructors() 539 { 540 UErrorCode status = U_ZERO_ERROR; 541 RuleBasedCollator *coll = 542 (RuleBasedCollator *)Collator::createInstance(status); 543 if (coll == NULL || U_FAILURE(status)) 544 { 545 errln("Couldn't create a default collator."); 546 return; 547 } 548 549 // testing protected constructor with character iterator as argument 550 StringCharacterIterator chariter(test1); 551 CollationElementIterator *iter1 = 552 coll->createCollationElementIterator(chariter); 553 if (U_FAILURE(status)) { 554 errln("Couldn't create collation element iterator with character iterator."); 555 return; 556 } 557 CollationElementIterator *iter2 = 558 coll->createCollationElementIterator(test1); 559 560 // initially the 2 collation element iterators should be the same 561 if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2 562 || *iter2 != *iter1) { 563 errln("CollationElementIterators constructed with the same string data should be the same at the start"); 564 } 565 assertEqual(*iter1, *iter2); 566 567 delete iter1; 568 delete iter2; 569 570 // tests empty strings 571 UnicodeString empty(""); 572 iter1 = coll->createCollationElementIterator(empty); 573 chariter.setText(empty); 574 iter2 = coll->createCollationElementIterator(chariter); 575 if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2 576 || *iter2 != *iter1) { 577 errln("CollationElementIterators constructed with the same string data should be the same at the start"); 578 } 579 if (iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) { 580 errln("Empty string should have no CEs."); 581 } 582 if (iter2->next(status) != (int32_t)CollationElementIterator::NULLORDER) { 583 errln("Empty string should have no CEs."); 584 } 585 delete iter1; 586 delete iter2; 587 delete coll; 588 } 589 590 /** 591 * Testing the strength order 592 */ 593 void CollationIteratorTest::TestStrengthOrder() 594 { 595 int order = 0x0123ABCD; 596 597 UErrorCode status = U_ZERO_ERROR; 598 RuleBasedCollator *coll = 599 (RuleBasedCollator *)Collator::createInstance(status); 600 if (coll == NULL || U_FAILURE(status)) 601 { 602 errln("Couldn't create a default collator."); 603 return; 604 } 605 606 coll->setStrength(Collator::PRIMARY); 607 CollationElementIterator *iter = 608 coll->createCollationElementIterator(test1); 609 610 if (iter == NULL) { 611 errln("Couldn't create a collation element iterator from default collator"); 612 return; 613 } 614 615 if (iter->strengthOrder(order) != 0x01230000) { 616 errln("Strength order for a primary strength collator should be the first 2 bytes"); 617 return; 618 } 619 620 coll->setStrength(Collator::SECONDARY); 621 if (iter->strengthOrder(order) != 0x0123AB00) { 622 errln("Strength order for a secondary strength collator should be the third byte"); 623 return; 624 } 625 626 coll->setStrength(Collator::TERTIARY); 627 if (iter->strengthOrder(order) != order) { 628 errln("Strength order for a tertiary strength collator should be the third byte"); 629 return; 630 } 631 delete iter; 632 delete coll; 633 } 634 635 /** 636 * Return a string containing all of the collation orders 637 * returned by calls to next on the specified iterator 638 */ 639 UnicodeString &CollationIteratorTest::orderString(CollationElementIterator &iter, UnicodeString &target) 640 { 641 int32_t order; 642 UErrorCode status = U_ZERO_ERROR; 643 644 while ((order = iter.next(status)) != CollationElementIterator::NULLORDER) 645 { 646 target += "0x"; 647 appendHex(order, 8, target); 648 target += " "; 649 } 650 651 return target; 652 } 653 654 void CollationIteratorTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2) 655 { 656 int32_t c1, c2, count = 0; 657 UErrorCode status = U_ZERO_ERROR; 658 659 do 660 { 661 c1 = i1.next(status); 662 c2 = i2.next(status); 663 664 if (c1 != c2) 665 { 666 errln(" %d: strength(0x%X) != strength(0x%X)", count, c1, c2); 667 break; 668 } 669 670 count += 1; 671 } 672 while (c1 != CollationElementIterator::NULLORDER); 673 } 674 675 void CollationIteratorTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) 676 { 677 if (exec) 678 { 679 logln("Collation Iteration Tests: "); 680 } 681 682 if(en_us) { 683 switch (index) 684 { 685 case 0: name = "TestPrevious"; if (exec) TestPrevious(/* par */); break; 686 case 1: name = "TestOffset"; if (exec) TestOffset(/* par */); break; 687 case 2: name = "TestSetText"; if (exec) TestSetText(/* par */); break; 688 case 3: name = "TestMaxExpansion"; if (exec) TestMaxExpansion(/* par */); break; 689 case 4: name = "TestClearBuffers"; if (exec) TestClearBuffers(/* par */); break; 690 case 5: name = "TestUnicodeChar"; if (exec) TestUnicodeChar(/* par */); break; 691 case 6: name = "TestAssignment"; if (exec) TestAssignment(/* par */); break; 692 case 7: name = "TestConstructors"; if (exec) TestConstructors(/* par */); break; 693 case 8: name = "TestStrengthOrder"; if (exec) TestStrengthOrder(/* par */); break; 694 default: name = ""; break; 695 } 696 } else { 697 dataerrln("Class iterator not instantiated"); 698 name = ""; 699 } 700 } 701 702 #endif /* #if !UCONFIG_NO_COLLATION */ 703