1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7 #include "unicode/utypes.h" 8 9 #if !UCONFIG_NO_COLLATION 10 11 #include "unicode/coll.h" 12 #include "unicode/tblcoll.h" 13 #include "unicode/unistr.h" 14 #include "unicode/sortkey.h" 15 #include "regcoll.h" 16 #include "sfwdchit.h" 17 18 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0])) 19 20 CollationRegressionTest::CollationRegressionTest() 21 { 22 UErrorCode status = U_ZERO_ERROR; 23 24 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status); 25 if(U_FAILURE(status)) { 26 delete en_us; 27 en_us = 0; 28 errcheckln(status, "Collator creation failed with %s", u_errorName(status)); 29 return; 30 } 31 } 32 33 CollationRegressionTest::~CollationRegressionTest() 34 { 35 delete en_us; 36 } 37 38 39 // @bug 4048446 40 // 41 // CollationElementIterator.reset() doesn't work 42 // 43 void CollationRegressionTest::Test4048446(/* char* par */) 44 { 45 const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?"; 46 const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?"; 47 CollationElementIterator *i1 = en_us->createCollationElementIterator(test1); 48 CollationElementIterator *i2 = en_us->createCollationElementIterator(test1); 49 UErrorCode status = U_ZERO_ERROR; 50 51 if (i1 == NULL|| i2 == NULL) 52 { 53 errln("Could not create CollationElementIterator's"); 54 delete i1; 55 delete i2; 56 return; 57 } 58 59 while (i1->next(status) != CollationElementIterator::NULLORDER) 60 { 61 if (U_FAILURE(status)) 62 { 63 errln("error calling next()"); 64 65 delete i1; 66 delete i2; 67 return; 68 } 69 } 70 71 i1->reset(); 72 73 assertEqual(*i1, *i2); 74 75 delete i1; 76 delete i2; 77 } 78 79 // @bug 4051866 80 // 81 // Collator -> rules -> Collator round-trip broken for expanding characters 82 // 83 void CollationRegressionTest::Test4051866(/* char* par */) 84 { 85 /* 86 RuleBasedCollator c1 = new RuleBasedCollator("< o " 87 +"& oe ,o\u3080" 88 +"& oe ,\u1530 ,O" 89 +"& OE ,O\u3080" 90 +"& OE ,\u1520" 91 +"< p ,P"); 92 */ 93 94 UnicodeString rules; 95 UErrorCode status = U_ZERO_ERROR; 96 97 rules += "< o "; 98 rules += "& oe ,o"; 99 rules += (UChar)0x3080; 100 rules += "& oe ,"; 101 rules += (UChar)0x1530; 102 rules += " ,O"; 103 rules += "& OE ,O"; 104 rules += (UChar)0x3080; 105 rules += "& OE ,"; 106 rules += (UChar)0x1520; 107 rules += "< p ,P"; 108 109 // Build a collator containing expanding characters 110 RuleBasedCollator *c1 = new RuleBasedCollator(rules, status); 111 112 // Build another using the rules from the first 113 RuleBasedCollator *c2 = new RuleBasedCollator(c1->getRules(), status); 114 115 // Make sure they're the same 116 if (!(c1->getRules() == c2->getRules())) 117 { 118 errln("Rules are not equal"); 119 } 120 121 delete c2; 122 delete c1; 123 } 124 125 // @bug 4053636 126 // 127 // Collator thinks "black-bird" == "black" 128 // 129 void CollationRegressionTest::Test4053636(/* char* par */) 130 { 131 if (en_us->equals("black_bird", "black")) 132 { 133 errln("black-bird == black"); 134 } 135 } 136 137 // @bug 4054238 138 // 139 // CollationElementIterator will not work correctly if the associated 140 // Collator object's mode is changed 141 // 142 void CollationRegressionTest::Test4054238(/* char* par */) 143 { 144 const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0}; 145 const UnicodeString test3(chars3); 146 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 147 148 // NOTE: The Java code uses en_us to create the CollationElementIterators 149 // but I'm pretty sure that's wrong, so I've changed this to use c. 150 UErrorCode status = U_ZERO_ERROR; 151 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 152 CollationElementIterator *i1 = c->createCollationElementIterator(test3); 153 delete i1; 154 delete c; 155 } 156 157 // @bug 4054734 158 // 159 // Collator::IDENTICAL documented but not implemented 160 // 161 void CollationRegressionTest::Test4054734(/* char* par */) 162 { 163 /* 164 Here's the original Java: 165 166 String[] decomp = { 167 "\u0001", "<", "\u0002", 168 "\u0001", "=", "\u0001", 169 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise 170 "\u00C0", "=", "A\u0300" // Decomp should make these equal 171 }; 172 173 String[] nodecomp = { 174 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave 175 }; 176 */ 177 178 static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] = 179 { 180 {0x0001, 0}, {0x3c, 0}, {0x0002, 0}, 181 {0x0001, 0}, {0x3d, 0}, {0x0001, 0}, 182 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0}, 183 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0} 184 }; 185 186 187 UErrorCode status = U_ZERO_ERROR; 188 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 189 190 c->setStrength(Collator::IDENTICAL); 191 192 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 193 compareArray(*c, decomp, ARRAY_LENGTH(decomp)); 194 195 delete c; 196 } 197 198 // @bug 4054736 199 // 200 // Full Decomposition mode not implemented 201 // 202 void CollationRegressionTest::Test4054736(/* char* par */) 203 { 204 UErrorCode status = U_ZERO_ERROR; 205 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 206 207 c->setStrength(Collator::SECONDARY); 208 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 209 210 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 211 { 212 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed 213 }; 214 215 compareArray(*c, tests, ARRAY_LENGTH(tests)); 216 217 delete c; 218 } 219 220 // @bug 4058613 221 // 222 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean 223 // 224 void CollationRegressionTest::Test4058613(/* char* par */) 225 { 226 // Creating a default collator doesn't work when Korean is the default 227 // locale 228 229 Locale oldDefault = Locale::getDefault(); 230 UErrorCode status = U_ZERO_ERROR; 231 232 Locale::setDefault(Locale::getKorean(), status); 233 234 if (U_FAILURE(status)) 235 { 236 errln("Could not set default locale to Locale::KOREAN"); 237 return; 238 } 239 240 Collator *c = NULL; 241 242 c = Collator::createInstance("en_US", status); 243 244 if (c == NULL || U_FAILURE(status)) 245 { 246 errln("Could not create a Korean collator"); 247 Locale::setDefault(oldDefault, status); 248 delete c; 249 return; 250 } 251 252 // Since the fix to this bug was to turn off decomposition for Korean collators, 253 // ensure that's what we got 254 if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF) 255 { 256 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator"); 257 } 258 259 delete c; 260 261 Locale::setDefault(oldDefault, status); 262 } 263 264 // @bug 4059820 265 // 266 // RuleBasedCollator.getRules does not return the exact pattern as input 267 // for expanding character sequences 268 // 269 void CollationRegressionTest::Test4059820(/* char* par */) 270 { 271 UErrorCode status = U_ZERO_ERROR; 272 273 RuleBasedCollator *c = NULL; 274 UnicodeString rules = "< a < b , c/a < d < z"; 275 276 c = new RuleBasedCollator(rules, status); 277 278 if (c == NULL || U_FAILURE(status)) 279 { 280 errln("Failure building a collator."); 281 delete c; 282 return; 283 } 284 285 if ( c->getRules().indexOf("c/a") == -1) 286 { 287 errln("returned rules do not contain 'c/a'"); 288 } 289 290 delete c; 291 } 292 293 // @bug 4060154 294 // 295 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" 296 // 297 void CollationRegressionTest::Test4060154(/* char* par */) 298 { 299 UErrorCode status = U_ZERO_ERROR; 300 UnicodeString rules; 301 302 rules += "< g, G < h, H < i, I < j, J"; 303 rules += " & H < "; 304 rules += (UChar)0x0131; 305 rules += ", "; 306 rules += (UChar)0x0130; 307 rules += ", i, I"; 308 309 RuleBasedCollator *c = NULL; 310 311 c = new RuleBasedCollator(rules, status); 312 313 if (c == NULL || U_FAILURE(status)) 314 { 315 errln("failure building collator."); 316 delete c; 317 return; 318 } 319 320 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 321 322 /* 323 String[] tertiary = { 324 "A", "<", "B", 325 "H", "<", "\u0131", 326 "H", "<", "I", 327 "\u0131", "<", "\u0130", 328 "\u0130", "<", "i", 329 "\u0130", ">", "H", 330 }; 331 */ 332 333 static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] = 334 { 335 {0x41, 0}, {0x3c, 0}, {0x42, 0}, 336 {0x48, 0}, {0x3c, 0}, {0x0131, 0}, 337 {0x48, 0}, {0x3c, 0}, {0x49, 0}, 338 {0x0131, 0}, {0x3c, 0}, {0x0130, 0}, 339 {0x0130, 0}, {0x3c, 0}, {0x69, 0}, 340 {0x0130, 0}, {0x3e, 0}, {0x48, 0} 341 }; 342 343 c->setStrength(Collator::TERTIARY); 344 compareArray(*c, tertiary, ARRAY_LENGTH(tertiary)); 345 346 /* 347 String[] secondary = { 348 "H", "<", "I", 349 "\u0131", "=", "\u0130", 350 }; 351 */ 352 static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] = 353 { 354 {0x48, 0}, {0x3c, 0}, {0x49, 0}, 355 {0x0131, 0}, {0x3d, 0}, {0x0130, 0} 356 }; 357 358 c->setStrength(Collator::PRIMARY); 359 compareArray(*c, secondary, ARRAY_LENGTH(secondary)); 360 361 delete c; 362 } 363 364 // @bug 4062418 365 // 366 // Secondary/Tertiary comparison incorrect in French Secondary 367 // 368 void CollationRegressionTest::Test4062418(/* char* par */) 369 { 370 UErrorCode status = U_ZERO_ERROR; 371 372 RuleBasedCollator *c = NULL; 373 374 c = (RuleBasedCollator *) Collator::createInstance(Locale::getFrance(), status); 375 376 if (c == NULL || U_FAILURE(status)) 377 { 378 errln("Failed to create collator for Locale::getFrance()"); 379 delete c; 380 return; 381 } 382 383 c->setStrength(Collator::SECONDARY); 384 385 /* 386 String[] tests = { 387 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater 388 }; 389 */ 390 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 391 { 392 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0} 393 }; 394 395 compareArray(*c, tests, ARRAY_LENGTH(tests)); 396 397 delete c; 398 } 399 400 // @bug 4065540 401 // 402 // Collator::compare() method broken if either string contains spaces 403 // 404 void CollationRegressionTest::Test4065540(/* char* par */) 405 { 406 if (en_us->compare("abcd e", "abcd f") == 0) 407 { 408 errln("'abcd e' == 'abcd f'"); 409 } 410 } 411 412 // @bug 4066189 413 // 414 // Unicode characters need to be recursively decomposed to get the 415 // correct result. For example, 416 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. 417 // 418 void CollationRegressionTest::Test4066189(/* char* par */) 419 { 420 static const UChar chars1[] = {0x1EB1, 0}; 421 static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0}; 422 const UnicodeString test1(chars1); 423 const UnicodeString test2(chars2); 424 UErrorCode status = U_ZERO_ERROR; 425 426 // NOTE: The java code used en_us to create the 427 // CollationElementIterator's. I'm pretty sure that 428 // was wrong, so I've change the code to use c1 and c2 429 RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone(); 430 c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 431 CollationElementIterator *i1 = c1->createCollationElementIterator(test1); 432 433 RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone(); 434 c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); 435 CollationElementIterator *i2 = c2->createCollationElementIterator(test2); 436 437 assertEqual(*i1, *i2); 438 439 delete i2; 440 delete c2; 441 delete i1; 442 delete c1; 443 } 444 445 // @bug 4066696 446 // 447 // French secondary collation checking at the end of compare iteration fails 448 // 449 void CollationRegressionTest::Test4066696(/* char* par */) 450 { 451 UErrorCode status = U_ZERO_ERROR; 452 RuleBasedCollator *c = NULL; 453 454 c = (RuleBasedCollator *)Collator::createInstance(Locale::getFrance(), status); 455 456 if (c == NULL || U_FAILURE(status)) 457 { 458 errln("Failure creating collator for Locale::getFrance()"); 459 delete c; 460 return; 461 } 462 463 c->setStrength(Collator::SECONDARY); 464 465 /* 466 String[] tests = { 467 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute 468 }; 469 470 should be: 471 472 String[] tests = { 473 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute 474 }; 475 476 */ 477 478 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 479 { 480 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0} 481 }; 482 483 compareArray(*c, tests, ARRAY_LENGTH(tests)); 484 485 delete c; 486 } 487 488 // @bug 4076676 489 // 490 // Bad canonicalization of same-class combining characters 491 // 492 void CollationRegressionTest::Test4076676(/* char* par */) 493 { 494 // These combining characters are all in the same class, so they should not 495 // be reordered, and they should compare as unequal. 496 static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0}; 497 static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0}; 498 499 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 500 c->setStrength(Collator::TERTIARY); 501 502 if (c->compare(s1,s2) == 0) 503 { 504 errln("Same-class combining chars were reordered"); 505 } 506 507 delete c; 508 } 509 510 // @bug 4079231 511 // 512 // RuleBasedCollator::operator==(NULL) throws NullPointerException 513 // 514 void CollationRegressionTest::Test4079231(/* char* par */) 515 { 516 // I don't think there's any way to write this test 517 // in C++. The following is equivalent to the Java, 518 // but doesn't compile 'cause NULL can't be converted 519 // to Collator& 520 // 521 // if (en_us->operator==(NULL)) 522 // { 523 // errln("en_us->operator==(NULL) returned TRUE"); 524 // } 525 526 /* 527 try { 528 if (en_us->equals(null)) { 529 errln("en_us->equals(null) returned true"); 530 } 531 } 532 catch (Exception e) { 533 errln("en_us->equals(null) threw " + e.toString()); 534 } 535 */ 536 } 537 538 // @bug 4078588 539 // 540 // RuleBasedCollator breaks on "< a < bb" rule 541 // 542 void CollationRegressionTest::Test4078588(/* char *par */) 543 { 544 UErrorCode status = U_ZERO_ERROR; 545 RuleBasedCollator *rbc = new RuleBasedCollator((UnicodeString)"< a < bb", status); 546 547 if (rbc == NULL || U_FAILURE(status)) 548 { 549 errln("Failed to create RuleBasedCollator."); 550 delete rbc; 551 return; 552 } 553 554 Collator::EComparisonResult result = rbc->compare("a","bb"); 555 556 if (result != Collator::LESS) 557 { 558 errln((UnicodeString)"Compare(a,bb) returned " + (int)result 559 + (UnicodeString)"; expected -1"); 560 } 561 562 delete rbc; 563 } 564 565 // @bug 4081866 566 // 567 // Combining characters in different classes not reordered properly. 568 // 569 void CollationRegressionTest::Test4081866(/* char* par */) 570 { 571 // These combining characters are all in different classes, 572 // so they should be reordered and the strings should compare as equal. 573 static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0}; 574 static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0}; 575 576 UErrorCode status = U_ZERO_ERROR; 577 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 578 c->setStrength(Collator::TERTIARY); 579 580 // Now that the default collators are set to NO_DECOMPOSITION 581 // (as a result of fixing bug 4114077), we must set it explicitly 582 // when we're testing reordering behavior. -- lwerner, 5/5/98 583 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 584 585 if (c->compare(s1,s2) != 0) 586 { 587 errln("Combining chars were not reordered"); 588 } 589 590 delete c; 591 } 592 593 // @bug 4087241 594 // 595 // string comparison errors in Scandinavian collators 596 // 597 void CollationRegressionTest::Test4087241(/* char* par */) 598 { 599 UErrorCode status = U_ZERO_ERROR; 600 Locale da_DK("da", "DK"); 601 RuleBasedCollator *c = NULL; 602 603 c = (RuleBasedCollator *) Collator::createInstance(da_DK, status); 604 605 if (c == NULL || U_FAILURE(status)) 606 { 607 errln("Failed to create collator for da_DK locale"); 608 delete c; 609 return; 610 } 611 612 c->setStrength(Collator::SECONDARY); 613 614 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 615 { 616 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae 617 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-unlaut < a-ring 618 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut 619 }; 620 621 compareArray(*c, tests, ARRAY_LENGTH(tests)); 622 623 delete c; 624 } 625 626 // @bug 4087243 627 // 628 // CollationKey takes ignorable strings into account when it shouldn't 629 // 630 void CollationRegressionTest::Test4087243(/* char* par */) 631 { 632 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 633 c->setStrength(Collator::TERTIARY); 634 635 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 636 { 637 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A 638 }; 639 640 compareArray(*c, tests, ARRAY_LENGTH(tests)); 641 642 delete c; 643 } 644 645 // @bug 4092260 646 // 647 // Mu/micro conflict 648 // Micro symbol and greek lowercase letter Mu should sort identically 649 // 650 void CollationRegressionTest::Test4092260(/* char* par */) 651 { 652 UErrorCode status = U_ZERO_ERROR; 653 Locale el("el", ""); 654 Collator *c = NULL; 655 656 c = Collator::createInstance(el, status); 657 658 if (c == NULL || U_FAILURE(status)) 659 { 660 errln("Failed to create collator for el locale."); 661 delete c; 662 return; 663 } 664 665 // These now have tertiary differences in UCA 666 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); 667 668 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 669 { 670 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0} 671 }; 672 673 compareArray(*c, tests, ARRAY_LENGTH(tests)); 674 675 delete c; 676 } 677 678 // @bug 4095316 679 // 680 void CollationRegressionTest::Test4095316(/* char* par */) 681 { 682 UErrorCode status = U_ZERO_ERROR; 683 Locale el_GR("el", "GR"); 684 Collator *c = Collator::createInstance(el_GR, status); 685 686 if (c == NULL || U_FAILURE(status)) 687 { 688 errln("Failed to create collator for el_GR locale"); 689 delete c; 690 return; 691 } 692 // These now have tertiary differences in UCA 693 //c->setStrength(Collator::TERTIARY); 694 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); 695 696 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 697 { 698 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0} 699 }; 700 701 compareArray(*c, tests, ARRAY_LENGTH(tests)); 702 703 delete c; 704 } 705 706 // @bug 4101940 707 // 708 void CollationRegressionTest::Test4101940(/* char* par */) 709 { 710 UErrorCode status = U_ZERO_ERROR; 711 RuleBasedCollator *c = NULL; 712 UnicodeString rules = "< a < b"; 713 UnicodeString nothing = ""; 714 715 c = new RuleBasedCollator(rules, status); 716 717 if (c == NULL || U_FAILURE(status)) 718 { 719 errln("Failed to create RuleBasedCollator"); 720 delete c; 721 return; 722 } 723 724 CollationElementIterator *i = c->createCollationElementIterator(nothing); 725 i->reset(); 726 727 if (i->next(status) != CollationElementIterator::NULLORDER) 728 { 729 errln("next did not return NULLORDER"); 730 } 731 732 delete i; 733 delete c; 734 } 735 736 // @bug 4103436 737 // 738 // Collator::compare not handling spaces properly 739 // 740 void CollationRegressionTest::Test4103436(/* char* par */) 741 { 742 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 743 c->setStrength(Collator::TERTIARY); 744 745 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 746 { 747 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}, 748 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0} 749 }; 750 751 compareArray(*c, tests, ARRAY_LENGTH(tests)); 752 753 delete c; 754 } 755 756 // @bug 4114076 757 // 758 // Collation not Unicode conformant with Hangul syllables 759 // 760 void CollationRegressionTest::Test4114076(/* char* par */) 761 { 762 UErrorCode status = U_ZERO_ERROR; 763 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 764 c->setStrength(Collator::TERTIARY); 765 766 // 767 // With Canonical decomposition, Hangul syllables should get decomposed 768 // into Jamo, but Jamo characters should not be decomposed into 769 // conjoining Jamo 770 // 771 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 772 { 773 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0} 774 }; 775 776 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 777 compareArray(*c, test1, ARRAY_LENGTH(test1)); 778 779 // From UTR #15: 780 // *In earlier versions of Unicode, jamo characters like ksf 781 // had compatibility mappings to kf + sf. These mappings were 782 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.) 783 // That is, the following test is obsolete as of 2.1.9 784 785 //obsolete- // With Full decomposition, it should go all the way down to 786 //obsolete- // conjoining Jamo characters. 787 //obsolete- // 788 //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = 789 //obsolete- { 790 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0} 791 //obsolete- }; 792 //obsolete- 793 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT); 794 //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2)); 795 796 delete c; 797 } 798 799 800 // @bug 4124632 801 // 802 // Collator::getCollationKey was hanging on certain character sequences 803 // 804 void CollationRegressionTest::Test4124632(/* char* par */) 805 { 806 UErrorCode status = U_ZERO_ERROR; 807 Collator *coll = NULL; 808 809 coll = Collator::createInstance(Locale::getJapan(), status); 810 811 if (coll == NULL || U_FAILURE(status)) 812 { 813 errln("Failed to create collator for Locale::JAPAN"); 814 delete coll; 815 return; 816 } 817 818 static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0}; 819 CollationKey key; 820 821 coll->getCollationKey(test, key, status); 822 823 if (key.isBogus() || U_FAILURE(status)) 824 { 825 errln("CollationKey creation failed."); 826 } 827 828 delete coll; 829 } 830 831 // @bug 4132736 832 // 833 // sort order of french words with multiple accents has errors 834 // 835 void CollationRegressionTest::Test4132736(/* char* par */) 836 { 837 UErrorCode status = U_ZERO_ERROR; 838 839 Collator *c = NULL; 840 841 c = Collator::createInstance(Locale::getFrance(), status); 842 c->setStrength(Collator::TERTIARY); 843 844 if (c == NULL || U_FAILURE(status)) 845 { 846 errln("Failed to create a collator for Locale::getFrance()"); 847 delete c; 848 return; 849 } 850 851 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 852 { 853 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0}, 854 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0} 855 }; 856 857 compareArray(*c, test1, ARRAY_LENGTH(test1)); 858 859 delete c; 860 } 861 862 // @bug 4133509 863 // 864 // The sorting using java.text.CollationKey is not in the exact order 865 // 866 void CollationRegressionTest::Test4133509(/* char* par */) 867 { 868 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 869 { 870 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0}, 871 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0}, 872 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0} 873 }; 874 875 compareArray(*en_us, test1, ARRAY_LENGTH(test1)); 876 } 877 878 // @bug 4114077 879 // 880 // Collation with decomposition off doesn't work for Europe 881 // 882 void CollationRegressionTest::Test4114077(/* char* par */) 883 { 884 // Ensure that we get the same results with decomposition off 885 // as we do with it on.... 886 887 UErrorCode status = U_ZERO_ERROR; 888 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 889 c->setStrength(Collator::TERTIARY); 890 891 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 892 { 893 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent 894 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0}, 895 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0}, 896 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute 897 // -> a, ring, acute 898 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal 899 }; 900 901 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); 902 compareArray(*c, test1, ARRAY_LENGTH(test1)); 903 904 static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = 905 { 906 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal 907 }; 908 909 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 910 compareArray(*c, test2, ARRAY_LENGTH(test2)); 911 912 delete c; 913 } 914 915 // @bug 4141640 916 // 917 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator) 918 // 919 void CollationRegressionTest::Test4141640(/* char* par */) 920 { 921 // 922 // Rather than just creating a Swedish collator, we might as well 923 // try to instantiate one for every locale available on the system 924 // in order to prevent this sort of bug from cropping up in the future 925 // 926 UErrorCode status = U_ZERO_ERROR; 927 int32_t i, localeCount; 928 const Locale *locales = Locale::getAvailableLocales(localeCount); 929 930 for (i = 0; i < localeCount; i += 1) 931 { 932 Collator *c = NULL; 933 934 status = U_ZERO_ERROR; 935 c = Collator::createInstance(locales[i], status); 936 937 if (c == NULL || U_FAILURE(status)) 938 { 939 UnicodeString msg, localeName; 940 941 msg += "Could not create collator for locale "; 942 msg += locales[i].getName(); 943 944 errln(msg); 945 } 946 947 delete c; 948 } 949 } 950 951 // @bug 4139572 952 // 953 // getCollationKey throws exception for spanish text 954 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 955 // 956 void CollationRegressionTest::Test4139572(/* char* par */) 957 { 958 // 959 // Code pasted straight from the bug report 960 // (and then translated to C++ ;-) 961 // 962 // create spanish locale and collator 963 UErrorCode status = U_ZERO_ERROR; 964 Locale l("es", "es"); 965 Collator *col = NULL; 966 967 col = Collator::createInstance(l, status); 968 969 if (col == NULL || U_FAILURE(status)) 970 { 971 errln("Failed to create a collator for es_es locale."); 972 delete col; 973 return; 974 } 975 976 CollationKey key; 977 978 // this spanish phrase kills it! 979 col->getCollationKey("Nombre De Objeto", key, status); 980 981 if (key.isBogus() || U_FAILURE(status)) 982 { 983 errln("Error creating CollationKey for \"Nombre De Ojbeto\""); 984 } 985 986 delete col; 987 } 988 /* HSYS : RuleBasedCollator::compare() performance enhancements 989 compare() does not create CollationElementIterator() anymore.*/ 990 991 class My4146160Collator : public RuleBasedCollator 992 { 993 public: 994 My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status); 995 ~My4146160Collator(); 996 997 CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const; 998 999 CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const; 1000 1001 static int32_t count; 1002 }; 1003 1004 int32_t My4146160Collator::count = 0; 1005 1006 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status) 1007 : RuleBasedCollator(rbc.getRules(), status) 1008 { 1009 } 1010 1011 My4146160Collator::~My4146160Collator() 1012 { 1013 } 1014 1015 CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const 1016 { 1017 count += 1; 1018 return RuleBasedCollator::createCollationElementIterator(text); 1019 } 1020 1021 CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const 1022 { 1023 count += 1; 1024 return RuleBasedCollator::createCollationElementIterator(text); 1025 } 1026 1027 // @bug 4146160 1028 // 1029 // RuleBasedCollator doesn't use createCollationElementIterator internally 1030 // 1031 void CollationRegressionTest::Test4146160(/* char* par */) 1032 { 1033 #if 0 1034 // 1035 // Use a custom collator class whose createCollationElementIterator 1036 // methods increment a count.... 1037 // 1038 UErrorCode status = U_ZERO_ERROR; 1039 CollationKey key; 1040 1041 My4146160Collator::count = 0; 1042 My4146160Collator *mc = NULL; 1043 1044 mc = new My4146160Collator(*en_us, status); 1045 1046 if (mc == NULL || U_FAILURE(status)) 1047 { 1048 errln("Failed to create a My4146160Collator."); 1049 delete mc; 1050 return; 1051 } 1052 1053 mc->getCollationKey("1", key, status); 1054 1055 if (key.isBogus() || U_FAILURE(status)) 1056 { 1057 errln("Failure to get a CollationKey from a My4146160Collator."); 1058 delete mc; 1059 return; 1060 } 1061 1062 if (My4146160Collator::count < 1) 1063 { 1064 errln("My4146160Collator::createCollationElementIterator not called for getCollationKey"); 1065 } 1066 1067 My4146160Collator::count = 0; 1068 mc->compare("1", "2"); 1069 1070 if (My4146160Collator::count < 1) 1071 { 1072 errln("My4146160Collator::createtCollationElementIterator not called for compare"); 1073 } 1074 1075 delete mc; 1076 #endif 1077 } 1078 void CollationRegressionTest::compareArray(Collator &c, 1079 const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN], 1080 int32_t testCount) 1081 { 1082 int32_t i; 1083 Collator::EComparisonResult expectedResult = Collator::EQUAL; 1084 1085 for (i = 0; i < testCount; i += 3) 1086 { 1087 UnicodeString source(tests[i]); 1088 UnicodeString comparison(tests[i + 1]); 1089 UnicodeString target(tests[i + 2]); 1090 1091 if (comparison == "<") 1092 { 1093 expectedResult = Collator::LESS; 1094 } 1095 else if (comparison == ">") 1096 { 1097 expectedResult = Collator::GREATER; 1098 } 1099 else if (comparison == "=") 1100 { 1101 expectedResult = Collator::EQUAL; 1102 } 1103 else 1104 { 1105 UnicodeString bogus1("Bogus comparison string \""); 1106 UnicodeString bogus2("\""); 1107 errln(bogus1 + comparison + bogus2); 1108 } 1109 1110 Collator::EComparisonResult compareResult = c.compare(source, target); 1111 1112 CollationKey sourceKey, targetKey; 1113 UErrorCode status = U_ZERO_ERROR; 1114 1115 c.getCollationKey(source, sourceKey, status); 1116 1117 if (U_FAILURE(status)) 1118 { 1119 errln("Couldn't get collationKey for source"); 1120 continue; 1121 } 1122 1123 c.getCollationKey(target, targetKey, status); 1124 1125 if (U_FAILURE(status)) 1126 { 1127 errln("Couldn't get collationKey for target"); 1128 continue; 1129 } 1130 1131 Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey); 1132 1133 reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult ); 1134 1135 } 1136 } 1137 1138 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2) 1139 { 1140 int32_t c1, c2, count = 0; 1141 UErrorCode status = U_ZERO_ERROR; 1142 1143 do 1144 { 1145 c1 = i1.next(status); 1146 c2 = i2.next(status); 1147 1148 if (c1 != c2) 1149 { 1150 UnicodeString msg, msg1(" "); 1151 1152 msg += msg1 + count; 1153 msg += ": strength(0x"; 1154 appendHex(c1, 8, msg); 1155 msg += ") != strength(0x"; 1156 appendHex(c2, 8, msg); 1157 msg += ")"; 1158 1159 errln(msg); 1160 break; 1161 } 1162 1163 count += 1; 1164 } 1165 while (c1 != CollationElementIterator::NULLORDER); 1166 } 1167 1168 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */) 1169 { 1170 if (exec) 1171 { 1172 logln("Collation Regression Tests: "); 1173 } 1174 1175 if(en_us) { 1176 switch (index) 1177 { 1178 case 0: name = "Test4048446"; if (exec) Test4048446(/* par */); break; 1179 case 1: name = "Test4051866"; if (exec) Test4051866(/* par */); break; 1180 case 2: name = "Test4053636"; if (exec) Test4053636(/* par */); break; 1181 case 3: name = "Test4054238"; if (exec) Test4054238(/* par */); break; 1182 case 4: name = "Test4054734"; if (exec) Test4054734(/* par */); break; 1183 case 5: name = "Test4054736"; if (exec) Test4054736(/* par */); break; 1184 case 6: name = "Test4058613"; if (exec) Test4058613(/* par */); break; 1185 case 7: name = "Test4059820"; if (exec) Test4059820(/* par */); break; 1186 case 8: name = "Test4060154"; if (exec) Test4060154(/* par */); break; 1187 case 9: name = "Test4062418"; if (exec) Test4062418(/* par */); break; 1188 case 10: name = "Test4065540"; if (exec) Test4065540(/* par */); break; 1189 case 11: name = "Test4066189"; if (exec) Test4066189(/* par */); break; 1190 case 12: name = "Test4066696"; if (exec) Test4066696(/* par */); break; 1191 case 13: name = "Test4076676"; if (exec) Test4076676(/* par */); break; 1192 case 14: name = "Test4078588"; if (exec) Test4078588(/* par */); break; 1193 case 15: name = "Test4079231"; if (exec) Test4079231(/* par */); break; 1194 case 16: name = "Test4081866"; if (exec) Test4081866(/* par */); break; 1195 case 17: name = "Test4087241"; if (exec) Test4087241(/* par */); break; 1196 case 18: name = "Test4087243"; if (exec) Test4087243(/* par */); break; 1197 case 19: name = "Test4092260"; if (exec) Test4092260(/* par */); break; 1198 case 20: name = "Test4095316"; if (exec) Test4095316(/* par */); break; 1199 case 21: name = "Test4101940"; if (exec) Test4101940(/* par */); break; 1200 case 22: name = "Test4103436"; if (exec) Test4103436(/* par */); break; 1201 case 23: name = "Test4114076"; if (exec) Test4114076(/* par */); break; 1202 case 24: name = "Test4114077"; if (exec) Test4114077(/* par */); break; 1203 case 25: name = "Test4124632"; if (exec) Test4124632(/* par */); break; 1204 case 26: name = "Test4132736"; if (exec) Test4132736(/* par */); break; 1205 case 27: name = "Test4133509"; if (exec) Test4133509(/* par */); break; 1206 case 28: name = "Test4139572"; if (exec) Test4139572(/* par */); break; 1207 case 29: name = "Test4141640"; if (exec) Test4141640(/* par */); break; 1208 case 30: name = "Test4146160"; if (exec) Test4146160(/* par */); break; 1209 default: name = ""; break; 1210 } 1211 } else { 1212 dataerrln("Class collator not instantiated"); 1213 name = ""; 1214 } 1215 } 1216 1217 #endif /* #if !UCONFIG_NO_COLLATION */ 1218