1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7 #include "unicode/utypes.h" 8 9 #if !UCONFIG_NO_COLLATION 10 11 #include "unicode/coll.h" 12 #include "unicode/tblcoll.h" 13 #include "unicode/unistr.h" 14 #include "unicode/sortkey.h" 15 #include "itercoll.h" 16 #include "unicode/schriter.h" 17 #include "unicode/chariter.h" 18 #include "unicode/uchar.h" 19 #include "cmemory.h" 20 21 #define ARRAY_LENGTH(array) (sizeof array / sizeof array[0]) 22 23 static UErrorCode status = U_ZERO_ERROR; 24 25 CollationIteratorTest::CollationIteratorTest() 26 : test1("What subset of all possible test cases?", ""), 27 test2("has the highest probability of detecting", "") 28 { 29 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status); 30 if(U_FAILURE(status)) { 31 delete en_us; 32 en_us = 0; 33 errcheckln(status, "Collator creation failed with %s", u_errorName(status)); 34 return; 35 } 36 37 } 38 39 CollationIteratorTest::~CollationIteratorTest() 40 { 41 delete en_us; 42 } 43 44 /** 45 * Test for CollationElementIterator previous and next for the whole set of 46 * unicode characters. 47 */ 48 void CollationIteratorTest::TestUnicodeChar() 49 { 50 CollationElementIterator *iter; 51 UChar codepoint; 52 UnicodeString source; 53 54 for (codepoint = 1; codepoint < 0xFFFE;) 55 { 56 source.remove(); 57 58 while (codepoint % 0xFF != 0) 59 { 60 if (u_isdefined(codepoint)) 61 source += codepoint; 62 codepoint ++; 63 } 64 65 if (u_isdefined(codepoint)) 66 source += codepoint; 67 68 if (codepoint != 0xFFFF) 69 codepoint ++; 70 71 iter = en_us->createCollationElementIterator(source); 72 /* A basic test to see if it's working at all */ 73 backAndForth(*iter); 74 delete iter; 75 } 76 } 77 78 /** 79 * Test for CollationElementIterator.previous() 80 * 81 * @bug 4108758 - Make sure it works with contracting characters 82 * 83 */ 84 void CollationIteratorTest::TestPrevious(/* char* par */) 85 { 86 UErrorCode status = U_ZERO_ERROR; 87 CollationElementIterator *iter = en_us->createCollationElementIterator(test1); 88 89 // A basic test to see if it's working at all 90 backAndForth(*iter); 91 delete iter; 92 93 // Test with a contracting character sequence 94 UnicodeString source; 95 RuleBasedCollator *c1 = NULL; 96 c1 = new RuleBasedCollator( 97 (UnicodeString)"&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status); 98 99 if (c1 == NULL || U_FAILURE(status)) 100 { 101 errln("Couldn't create a RuleBasedCollator with a contracting sequence."); 102 delete c1; 103 return; 104 } 105 106 source = "abchdcba"; 107 iter = c1->createCollationElementIterator(source); 108 backAndForth(*iter); 109 delete iter; 110 delete c1; 111 112 // Test with an expanding character sequence 113 RuleBasedCollator *c2 = NULL; 114 c2 = new RuleBasedCollator((UnicodeString)"&a < b < c/abd < d", status); 115 116 if (c2 == NULL || U_FAILURE(status)) 117 { 118 errln("Couldn't create a RuleBasedCollator with an expanding sequence."); 119 delete c2; 120 return; 121 } 122 123 source = "abcd"; 124 iter = c2->createCollationElementIterator(source); 125 backAndForth(*iter); 126 delete iter; 127 delete c2; 128 129 // Now try both 130 RuleBasedCollator *c3 = NULL; 131 c3 = new RuleBasedCollator((UnicodeString)"&a < b < c/aba < d < z < ch", status); 132 133 if (c3 == NULL || U_FAILURE(status)) 134 { 135 errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence."); 136 delete c3; 137 return; 138 } 139 140 source = "abcdbchdc"; 141 iter = c3->createCollationElementIterator(source); 142 backAndForth(*iter); 143 delete iter; 144 delete c3; 145 146 status=U_ZERO_ERROR; 147 source= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc"); 148 149 Collator *c4 = Collator::createInstance(Locale("th", "TH", ""), status); 150 if(U_FAILURE(status)){ 151 errln("Couldn't create a collator"); 152 } 153 iter = ((RuleBasedCollator*)c4)->createCollationElementIterator(source); 154 backAndForth(*iter); 155 delete iter; 156 delete c4; 157 158 source= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC"); 159 Collator *c5 = Collator::createInstance(Locale("ja", "JP", ""), status); 160 161 iter = ((RuleBasedCollator*)c5)->createCollationElementIterator(source); 162 if(U_FAILURE(status)){ 163 errln("Couldn't create Japanese collator\n"); 164 } 165 backAndForth(*iter); 166 delete iter; 167 delete c5; 168 } 169 170 /** 171 * Test for getOffset() and setOffset() 172 */ 173 void CollationIteratorTest::TestOffset(/* char* par */) 174 { 175 CollationElementIterator *iter = en_us->createCollationElementIterator(test1); 176 UErrorCode status = U_ZERO_ERROR; 177 // testing boundaries 178 iter->setOffset(0, status); 179 if (U_FAILURE(status) || iter->previous(status) != UCOL_NULLORDER) { 180 errln("Error: After setting offset to 0, we should be at the end " 181 "of the backwards iteration"); 182 } 183 iter->setOffset(test1.length(), status); 184 if (U_FAILURE(status) || iter->next(status) != UCOL_NULLORDER) { 185 errln("Error: After setting offset to end of the string, we should " 186 "be at the end of the backwards iteration"); 187 } 188 189 // Run all the way through the iterator, then get the offset 190 int32_t orderLength = 0; 191 Order *orders = getOrders(*iter, orderLength); 192 193 int32_t offset = iter->getOffset(); 194 195 if (offset != test1.length()) 196 { 197 UnicodeString msg1("offset at end != length: "); 198 UnicodeString msg2(" vs "); 199 200 errln(msg1 + offset + msg2 + test1.length()); 201 } 202 203 // Now set the offset back to the beginning and see if it works 204 CollationElementIterator *pristine = en_us->createCollationElementIterator(test1); 205 206 iter->setOffset(0, status); 207 208 if (U_FAILURE(status)) 209 { 210 errln("setOffset failed."); 211 } 212 else 213 { 214 assertEqual(*iter, *pristine); 215 } 216 217 // TODO: try iterating halfway through a messy string. 218 219 delete pristine; 220 delete[] orders; 221 delete iter; 222 } 223 224 /** 225 * Test for setText() 226 */ 227 void CollationIteratorTest::TestSetText(/* char* par */) 228 { 229 CollationElementIterator *iter1 = en_us->createCollationElementIterator(test1); 230 CollationElementIterator *iter2 = en_us->createCollationElementIterator(test2); 231 UErrorCode status = U_ZERO_ERROR; 232 233 // Run through the second iterator just to exercise it 234 int32_t c = iter2->next(status); 235 int32_t i = 0; 236 237 while ( ++i < 10 && c != CollationElementIterator::NULLORDER) 238 { 239 if (U_FAILURE(status)) 240 { 241 errln("iter2->next() returned an error."); 242 delete iter2; 243 delete iter1; 244 } 245 246 c = iter2->next(status); 247 } 248 249 // Now set it to point to the same string as the first iterator 250 iter2->setText(test1, status); 251 252 if (U_FAILURE(status)) 253 { 254 errln("call to iter2->setText(test1) failed."); 255 } 256 else 257 { 258 assertEqual(*iter1, *iter2); 259 } 260 iter1->reset(); 261 //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text 262 CharacterIterator* chariter = new StringCharacterIterator(test1); 263 iter2->setText(*chariter, status); 264 if (U_FAILURE(status)) 265 { 266 errln("call to iter2->setText(chariter(test1)) failed."); 267 } 268 else 269 { 270 assertEqual(*iter1, *iter2); 271 } 272 273 // test for an empty string 274 UnicodeString empty(""); 275 iter1->setText(empty, status); 276 if (U_FAILURE(status) 277 || iter1->next(status) != (int32_t)UCOL_NULLORDER) { 278 errln("Empty string should have no CEs."); 279 } 280 ((StringCharacterIterator *)chariter)->setText(empty); 281 iter1->setText(*chariter, status); 282 if (U_FAILURE(status) 283 || iter1->next(status) != (int32_t)UCOL_NULLORDER) { 284 errln("Empty string should have no CEs."); 285 } 286 delete chariter; 287 delete iter2; 288 delete iter1; 289 } 290 291 /** @bug 4108762 292 * Test for getMaxExpansion() 293 */ 294 void CollationIteratorTest::TestMaxExpansion(/* char* par */) 295 { 296 UErrorCode status = U_ZERO_ERROR; 297 UnicodeString rule("&a < ab < c/aba < d < z < ch"); 298 RuleBasedCollator *coll = new RuleBasedCollator(rule, status); 299 UChar ch = 0; 300 UnicodeString str(ch); 301 302 CollationElementIterator *iter = coll->createCollationElementIterator(str); 303 304 while (ch < 0xFFFF && U_SUCCESS(status)) { 305 int count = 1; 306 uint32_t order; 307 ch ++; 308 UnicodeString str(ch); 309 iter->setText(str, status); 310 order = iter->previous(status); 311 312 /* thai management */ 313 if (CollationElementIterator::isIgnorable(order)) 314 order = iter->previous(status); 315 316 while (U_SUCCESS(status) 317 && iter->previous(status) != (int32_t)UCOL_NULLORDER) 318 { 319 count ++; 320 } 321 322 if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) { 323 errln("Failure at codepoint %d, maximum expansion count < %d\n", 324 ch, count); 325 } 326 } 327 328 delete iter; 329 delete coll; 330 } 331 332 /* 333 * @bug 4157299 334 */ 335 void CollationIteratorTest::TestClearBuffers(/* char* par */) 336 { 337 UErrorCode status = U_ZERO_ERROR; 338 RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status); 339 340 if (c == NULL || U_FAILURE(status)) 341 { 342 errln("Couldn't create a RuleBasedCollator."); 343 delete c; 344 return; 345 } 346 347 UnicodeString source("abcd"); 348 CollationElementIterator *i = c->createCollationElementIterator(source); 349 int32_t e0 = i->next(status); // save the first collation element 350 351 if (U_FAILURE(status)) 352 { 353 errln("call to i->next() failed. err=%s", u_errorName(status)); 354 } 355 else 356 { 357 i->setOffset(3, status); // go to the expanding character 358 359 if (U_FAILURE(status)) 360 { 361 errln("call to i->setOffset(3) failed. err=%s", u_errorName(status)); 362 } 363 else 364 { 365 i->next(status); // but only use up half of it 366 367 if (U_FAILURE(status)) 368 { 369 errln("call to i->next() failed. err=%s", u_errorName(status)); 370 } 371 else 372 { 373 i->setOffset(0, status); // go back to the beginning 374 375 if (U_FAILURE(status)) 376 { 377 errln("call to i->setOffset(0) failed. err=%s", u_errorName(status)); 378 } 379 else 380 { 381 int32_t e = i->next(status); // and get this one again 382 383 if (U_FAILURE(status)) 384 { 385 errln("call to i->next() failed. err=%s", u_errorName(status)); 386 } 387 else if (e != e0) 388 { 389 errln("got 0x%X, expected 0x%X", e, e0); 390 } 391 } 392 } 393 } 394 } 395 396 delete i; 397 delete c; 398 } 399 400 /** 401 * Testing the assignment operator 402 */ 403 void CollationIteratorTest::TestAssignment() 404 { 405 UErrorCode status = U_ZERO_ERROR; 406 RuleBasedCollator *coll = 407 (RuleBasedCollator *)Collator::createInstance(status); 408 409 if (coll == NULL || U_FAILURE(status)) 410 { 411 errln("Couldn't create a default collator."); 412 return; 413 } 414 415 UnicodeString source("abcd"); 416 CollationElementIterator *iter1 = 417 coll->createCollationElementIterator(source); 418 419 CollationElementIterator iter2 = *iter1; 420 421 if (*iter1 != iter2) { 422 errln("Fail collation iterator assignment does not produce the same elements"); 423 } 424 425 CollationElementIterator iter3(*iter1); 426 427 if (*iter1 != iter3) { 428 errln("Fail collation iterator copy constructor does not produce the same elements"); 429 } 430 431 source = CharsToUnicodeString("a\\u0300\\u0325"); 432 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 433 CollationElementIterator *iter4 434 = coll->createCollationElementIterator(source); 435 CollationElementIterator iter5(*iter4); 436 if (*iter4 != iter5) { 437 errln("collation iterator assignment does not produce the same elements"); 438 } 439 iter4->next(status); 440 if (U_FAILURE(status) || *iter4 == iter5) { 441 errln("collation iterator not equal"); 442 } 443 iter5.next(status); 444 if (U_FAILURE(status) || *iter4 != iter5) { 445 errln("collation iterator equal"); 446 } 447 iter4->next(status); 448 if (U_FAILURE(status) || *iter4 == iter5) { 449 errln("collation iterator not equal"); 450 } 451 iter5.next(status); 452 if (U_FAILURE(status) || *iter4 != iter5) { 453 errln("collation iterator equal"); 454 } 455 CollationElementIterator iter6(*iter4); 456 if (*iter4 != iter6) { 457 errln("collation iterator equal"); 458 } 459 iter4->next(status); 460 if (U_FAILURE(status) || *iter4 == iter5) { 461 errln("collation iterator not equal"); 462 } 463 iter5.next(status); 464 if (U_FAILURE(status) || *iter4 != iter5) { 465 errln("collation iterator equal"); 466 } 467 iter4->next(status); 468 if (U_FAILURE(status) || *iter4 == iter5) { 469 errln("collation iterator not equal"); 470 } 471 iter5.next(status); 472 if (U_FAILURE(status) || *iter4 != iter5) { 473 errln("collation iterator equal"); 474 } 475 delete iter1; 476 delete iter4; 477 delete coll; 478 } 479 480 /** 481 * Testing the constructors 482 */ 483 void CollationIteratorTest::TestConstructors() 484 { 485 UErrorCode status = U_ZERO_ERROR; 486 RuleBasedCollator *coll = 487 (RuleBasedCollator *)Collator::createInstance(status); 488 if (coll == NULL || U_FAILURE(status)) 489 { 490 errln("Couldn't create a default collator."); 491 return; 492 } 493 494 // testing protected constructor with character iterator as argument 495 StringCharacterIterator chariter(test1); 496 CollationElementIterator *iter1 = 497 coll->createCollationElementIterator(chariter); 498 if (U_FAILURE(status)) { 499 errln("Couldn't create collation element iterator with character iterator."); 500 return; 501 } 502 CollationElementIterator *iter2 = 503 coll->createCollationElementIterator(test1); 504 505 // initially the 2 collation element iterators should be the same 506 if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2 507 || *iter2 != *iter1) { 508 errln("CollationElementIterators constructed with the same string data should be the same at the start"); 509 } 510 assertEqual(*iter1, *iter2); 511 512 delete iter1; 513 delete iter2; 514 515 // tests empty strings 516 UnicodeString empty(""); 517 iter1 = coll->createCollationElementIterator(empty); 518 chariter.setText(empty); 519 iter2 = coll->createCollationElementIterator(chariter); 520 if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2 521 || *iter2 != *iter1) { 522 errln("CollationElementIterators constructed with the same string data should be the same at the start"); 523 } 524 if (iter1->next(status) != (int32_t)UCOL_NULLORDER) { 525 errln("Empty string should have no CEs."); 526 } 527 if (iter2->next(status) != (int32_t)UCOL_NULLORDER) { 528 errln("Empty string should have no CEs."); 529 } 530 delete iter1; 531 delete iter2; 532 delete coll; 533 } 534 535 /** 536 * Testing the strength order 537 */ 538 void CollationIteratorTest::TestStrengthOrder() 539 { 540 int order = 0x0123ABCD; 541 542 UErrorCode status = U_ZERO_ERROR; 543 RuleBasedCollator *coll = 544 (RuleBasedCollator *)Collator::createInstance(status); 545 if (coll == NULL || U_FAILURE(status)) 546 { 547 errln("Couldn't create a default collator."); 548 return; 549 } 550 551 coll->setStrength(Collator::PRIMARY); 552 CollationElementIterator *iter = 553 coll->createCollationElementIterator(test1); 554 555 if (iter == NULL) { 556 errln("Couldn't create a collation element iterator from default collator"); 557 return; 558 } 559 560 if (iter->strengthOrder(order) != 0x01230000) { 561 errln("Strength order for a primary strength collator should be the first 2 bytes"); 562 return; 563 } 564 565 coll->setStrength(Collator::SECONDARY); 566 if (iter->strengthOrder(order) != 0x0123AB00) { 567 errln("Strength order for a secondary strength collator should be the third byte"); 568 return; 569 } 570 571 coll->setStrength(Collator::TERTIARY); 572 if (iter->strengthOrder(order) != order) { 573 errln("Strength order for a tertiary strength collator should be the third byte"); 574 return; 575 } 576 delete iter; 577 delete coll; 578 } 579 580 /** 581 * Return a string containing all of the collation orders 582 * returned by calls to next on the specified iterator 583 */ 584 UnicodeString &CollationIteratorTest::orderString(CollationElementIterator &iter, UnicodeString &target) 585 { 586 int32_t order; 587 UErrorCode status = U_ZERO_ERROR; 588 589 while ((order = iter.next(status)) != CollationElementIterator::NULLORDER) 590 { 591 target += "0x"; 592 appendHex(order, 8, target); 593 target += " "; 594 } 595 596 return target; 597 } 598 599 void CollationIteratorTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2) 600 { 601 int32_t c1, c2, count = 0; 602 UErrorCode status = U_ZERO_ERROR; 603 604 do 605 { 606 c1 = i1.next(status); 607 c2 = i2.next(status); 608 609 if (c1 != c2) 610 { 611 errln(" %d: strength(0x%X) != strength(0x%X)", count, c1, c2); 612 break; 613 } 614 615 count += 1; 616 } 617 while (c1 != CollationElementIterator::NULLORDER); 618 } 619 620 void CollationIteratorTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) 621 { 622 if (exec) 623 { 624 logln("Collation Iteration Tests: "); 625 } 626 627 if(en_us) { 628 switch (index) 629 { 630 case 0: name = "TestPrevious"; if (exec) TestPrevious(/* par */); break; 631 case 1: name = "TestOffset"; if (exec) TestOffset(/* par */); break; 632 case 2: name = "TestSetText"; if (exec) TestSetText(/* par */); break; 633 case 3: name = "TestMaxExpansion"; if (exec) TestMaxExpansion(/* par */); break; 634 case 4: name = "TestClearBuffers"; if (exec) TestClearBuffers(/* par */); break; 635 case 5: name = "TestUnicodeChar"; if (exec) TestUnicodeChar(/* par */); break; 636 case 6: name = "TestAssignment"; if (exec) TestAssignment(/* par */); break; 637 case 7: name = "TestConstructors"; if (exec) TestConstructors(/* par */); break; 638 case 8: name = "TestStrengthOrder"; if (exec) TestStrengthOrder(/* par */); break; 639 default: name = ""; break; 640 } 641 } else { 642 dataerrln("Class iterator not instantiated"); 643 name = ""; 644 } 645 } 646 647 #endif /* #if !UCONFIG_NO_COLLATION */ 648