1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7 #include "unicode/utypes.h" 8 9 #if !UCONFIG_NO_COLLATION 10 11 #include "unicode/coll.h" 12 #include "unicode/tblcoll.h" 13 #include "unicode/unistr.h" 14 #include "unicode/sortkey.h" 15 #include "regcoll.h" 16 #include "sfwdchit.h" 17 #include "testutil.h" 18 #include "cmemory.h" 19 20 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0])) 21 22 CollationRegressionTest::CollationRegressionTest() 23 { 24 UErrorCode status = U_ZERO_ERROR; 25 26 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status); 27 if(U_FAILURE(status)) { 28 delete en_us; 29 en_us = 0; 30 errcheckln(status, "Collator creation failed with %s", u_errorName(status)); 31 return; 32 } 33 } 34 35 CollationRegressionTest::~CollationRegressionTest() 36 { 37 delete en_us; 38 } 39 40 41 // @bug 4048446 42 // 43 // CollationElementIterator.reset() doesn't work 44 // 45 void CollationRegressionTest::Test4048446(/* char* par */) 46 { 47 const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?"; 48 const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?"; 49 CollationElementIterator *i1 = en_us->createCollationElementIterator(test1); 50 CollationElementIterator *i2 = en_us->createCollationElementIterator(test1); 51 UErrorCode status = U_ZERO_ERROR; 52 53 if (i1 == NULL|| i2 == NULL) 54 { 55 errln("Could not create CollationElementIterator's"); 56 delete i1; 57 delete i2; 58 return; 59 } 60 61 while (i1->next(status) != CollationElementIterator::NULLORDER) 62 { 63 if (U_FAILURE(status)) 64 { 65 errln("error calling next()"); 66 67 delete i1; 68 delete i2; 69 return; 70 } 71 } 72 73 i1->reset(); 74 75 assertEqual(*i1, *i2); 76 77 delete i1; 78 delete i2; 79 } 80 81 // @bug 4051866 82 // 83 // Collator -> rules -> Collator round-trip broken for expanding characters 84 // 85 void CollationRegressionTest::Test4051866(/* char* par */) 86 { 87 /* 88 RuleBasedCollator c1 = new RuleBasedCollator("< o " 89 +"& oe ,o\u3080" 90 +"& oe ,\u1530 ,O" 91 +"& OE ,O\u3080" 92 +"& OE ,\u1520" 93 +"< p ,P"); 94 */ 95 96 UnicodeString rules; 97 UErrorCode status = U_ZERO_ERROR; 98 99 rules += "< o "; 100 rules += "& oe ,o"; 101 rules += (UChar)0x3080; 102 rules += "& oe ,"; 103 rules += (UChar)0x1530; 104 rules += " ,O"; 105 rules += "& OE ,O"; 106 rules += (UChar)0x3080; 107 rules += "& OE ,"; 108 rules += (UChar)0x1520; 109 rules += "< p ,P"; 110 111 // Build a collator containing expanding characters 112 RuleBasedCollator *c1 = new RuleBasedCollator(rules, status); 113 114 // Build another using the rules from the first 115 RuleBasedCollator *c2 = new RuleBasedCollator(c1->getRules(), status); 116 117 // Make sure they're the same 118 if (!(c1->getRules() == c2->getRules())) 119 { 120 errln("Rules are not equal"); 121 } 122 123 delete c2; 124 delete c1; 125 } 126 127 // @bug 4053636 128 // 129 // Collator thinks "black-bird" == "black" 130 // 131 void CollationRegressionTest::Test4053636(/* char* par */) 132 { 133 if (en_us->equals("black_bird", "black")) 134 { 135 errln("black-bird == black"); 136 } 137 } 138 139 // @bug 4054238 140 // 141 // CollationElementIterator will not work correctly if the associated 142 // Collator object's mode is changed 143 // 144 void CollationRegressionTest::Test4054238(/* char* par */) 145 { 146 const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0}; 147 const UnicodeString test3(chars3); 148 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 149 150 // NOTE: The Java code uses en_us to create the CollationElementIterators 151 // but I'm pretty sure that's wrong, so I've changed this to use c. 152 UErrorCode status = U_ZERO_ERROR; 153 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 154 CollationElementIterator *i1 = c->createCollationElementIterator(test3); 155 delete i1; 156 delete c; 157 } 158 159 // @bug 4054734 160 // 161 // Collator::IDENTICAL documented but not implemented 162 // 163 void CollationRegressionTest::Test4054734(/* char* par */) 164 { 165 /* 166 Here's the original Java: 167 168 String[] decomp = { 169 "\u0001", "<", "\u0002", 170 "\u0001", "=", "\u0001", 171 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise 172 "\u00C0", "=", "A\u0300" // Decomp should make these equal 173 }; 174 175 String[] nodecomp = { 176 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave 177 }; 178 */ 179 180 static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] = 181 { 182 {0x0001, 0}, {0x3c, 0}, {0x0002, 0}, 183 {0x0001, 0}, {0x3d, 0}, {0x0001, 0}, 184 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0}, 185 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0} 186 }; 187 188 189 UErrorCode status = U_ZERO_ERROR; 190 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 191 192 c->setStrength(Collator::IDENTICAL); 193 194 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 195 compareArray(*c, decomp, ARRAY_LENGTH(decomp)); 196 197 delete c; 198 } 199 200 // @bug 4054736 201 // 202 // Full Decomposition mode not implemented 203 // 204 void CollationRegressionTest::Test4054736(/* char* par */) 205 { 206 UErrorCode status = U_ZERO_ERROR; 207 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 208 209 c->setStrength(Collator::SECONDARY); 210 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 211 212 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 213 { 214 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed 215 }; 216 217 compareArray(*c, tests, ARRAY_LENGTH(tests)); 218 219 delete c; 220 } 221 222 // @bug 4058613 223 // 224 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean 225 // 226 void CollationRegressionTest::Test4058613(/* char* par */) 227 { 228 // Creating a default collator doesn't work when Korean is the default 229 // locale 230 231 Locale oldDefault = Locale::getDefault(); 232 UErrorCode status = U_ZERO_ERROR; 233 234 Locale::setDefault(Locale::getKorean(), status); 235 236 if (U_FAILURE(status)) 237 { 238 errln("Could not set default locale to Locale::KOREAN"); 239 return; 240 } 241 242 Collator *c = NULL; 243 244 c = Collator::createInstance("en_US", status); 245 246 if (c == NULL || U_FAILURE(status)) 247 { 248 errln("Could not create a Korean collator"); 249 Locale::setDefault(oldDefault, status); 250 delete c; 251 return; 252 } 253 254 // Since the fix to this bug was to turn off decomposition for Korean collators, 255 // ensure that's what we got 256 if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF) 257 { 258 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator"); 259 } 260 261 delete c; 262 263 Locale::setDefault(oldDefault, status); 264 } 265 266 // @bug 4059820 267 // 268 // RuleBasedCollator.getRules does not return the exact pattern as input 269 // for expanding character sequences 270 // 271 void CollationRegressionTest::Test4059820(/* char* par */) 272 { 273 UErrorCode status = U_ZERO_ERROR; 274 275 RuleBasedCollator *c = NULL; 276 UnicodeString rules = "< a < b , c/a < d < z"; 277 278 c = new RuleBasedCollator(rules, status); 279 280 if (c == NULL || U_FAILURE(status)) 281 { 282 errln("Failure building a collator."); 283 delete c; 284 return; 285 } 286 287 if ( c->getRules().indexOf("c/a") == -1) 288 { 289 errln("returned rules do not contain 'c/a'"); 290 } 291 292 delete c; 293 } 294 295 // @bug 4060154 296 // 297 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" 298 // 299 void CollationRegressionTest::Test4060154(/* char* par */) 300 { 301 UErrorCode status = U_ZERO_ERROR; 302 UnicodeString rules; 303 304 rules += "< g, G < h, H < i, I < j, J"; 305 rules += " & H < "; 306 rules += (UChar)0x0131; 307 rules += ", "; 308 rules += (UChar)0x0130; 309 rules += ", i, I"; 310 311 RuleBasedCollator *c = NULL; 312 313 c = new RuleBasedCollator(rules, status); 314 315 if (c == NULL || U_FAILURE(status)) 316 { 317 errln("failure building collator."); 318 delete c; 319 return; 320 } 321 322 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 323 324 /* 325 String[] tertiary = { 326 "A", "<", "B", 327 "H", "<", "\u0131", 328 "H", "<", "I", 329 "\u0131", "<", "\u0130", 330 "\u0130", "<", "i", 331 "\u0130", ">", "H", 332 }; 333 */ 334 335 static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] = 336 { 337 {0x41, 0}, {0x3c, 0}, {0x42, 0}, 338 {0x48, 0}, {0x3c, 0}, {0x0131, 0}, 339 {0x48, 0}, {0x3c, 0}, {0x49, 0}, 340 {0x0131, 0}, {0x3c, 0}, {0x0130, 0}, 341 {0x0130, 0}, {0x3c, 0}, {0x69, 0}, 342 {0x0130, 0}, {0x3e, 0}, {0x48, 0} 343 }; 344 345 c->setStrength(Collator::TERTIARY); 346 compareArray(*c, tertiary, ARRAY_LENGTH(tertiary)); 347 348 /* 349 String[] secondary = { 350 "H", "<", "I", 351 "\u0131", "=", "\u0130", 352 }; 353 */ 354 static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] = 355 { 356 {0x48, 0}, {0x3c, 0}, {0x49, 0}, 357 {0x0131, 0}, {0x3d, 0}, {0x0130, 0} 358 }; 359 360 c->setStrength(Collator::PRIMARY); 361 compareArray(*c, secondary, ARRAY_LENGTH(secondary)); 362 363 delete c; 364 } 365 366 // @bug 4062418 367 // 368 // Secondary/Tertiary comparison incorrect in French Secondary 369 // 370 void CollationRegressionTest::Test4062418(/* char* par */) 371 { 372 UErrorCode status = U_ZERO_ERROR; 373 374 RuleBasedCollator *c = NULL; 375 376 c = (RuleBasedCollator *) Collator::createInstance(Locale::getFrance(), status); 377 378 if (c == NULL || U_FAILURE(status)) 379 { 380 errln("Failed to create collator for Locale::getFrance()"); 381 delete c; 382 return; 383 } 384 385 c->setStrength(Collator::SECONDARY); 386 387 /* 388 String[] tests = { 389 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater 390 }; 391 */ 392 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 393 { 394 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0} 395 }; 396 397 compareArray(*c, tests, ARRAY_LENGTH(tests)); 398 399 delete c; 400 } 401 402 // @bug 4065540 403 // 404 // Collator::compare() method broken if either string contains spaces 405 // 406 void CollationRegressionTest::Test4065540(/* char* par */) 407 { 408 if (en_us->compare("abcd e", "abcd f") == 0) 409 { 410 errln("'abcd e' == 'abcd f'"); 411 } 412 } 413 414 // @bug 4066189 415 // 416 // Unicode characters need to be recursively decomposed to get the 417 // correct result. For example, 418 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. 419 // 420 void CollationRegressionTest::Test4066189(/* char* par */) 421 { 422 static const UChar chars1[] = {0x1EB1, 0}; 423 static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0}; 424 const UnicodeString test1(chars1); 425 const UnicodeString test2(chars2); 426 UErrorCode status = U_ZERO_ERROR; 427 428 // NOTE: The java code used en_us to create the 429 // CollationElementIterator's. I'm pretty sure that 430 // was wrong, so I've change the code to use c1 and c2 431 RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone(); 432 c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 433 CollationElementIterator *i1 = c1->createCollationElementIterator(test1); 434 435 RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone(); 436 c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); 437 CollationElementIterator *i2 = c2->createCollationElementIterator(test2); 438 439 assertEqual(*i1, *i2); 440 441 delete i2; 442 delete c2; 443 delete i1; 444 delete c1; 445 } 446 447 // @bug 4066696 448 // 449 // French secondary collation checking at the end of compare iteration fails 450 // 451 void CollationRegressionTest::Test4066696(/* char* par */) 452 { 453 UErrorCode status = U_ZERO_ERROR; 454 RuleBasedCollator *c = NULL; 455 456 c = (RuleBasedCollator *)Collator::createInstance(Locale::getFrance(), status); 457 458 if (c == NULL || U_FAILURE(status)) 459 { 460 errln("Failure creating collator for Locale::getFrance()"); 461 delete c; 462 return; 463 } 464 465 c->setStrength(Collator::SECONDARY); 466 467 /* 468 String[] tests = { 469 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute 470 }; 471 472 should be: 473 474 String[] tests = { 475 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute 476 }; 477 478 */ 479 480 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 481 { 482 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0} 483 }; 484 485 compareArray(*c, tests, ARRAY_LENGTH(tests)); 486 487 delete c; 488 } 489 490 // @bug 4076676 491 // 492 // Bad canonicalization of same-class combining characters 493 // 494 void CollationRegressionTest::Test4076676(/* char* par */) 495 { 496 // These combining characters are all in the same class, so they should not 497 // be reordered, and they should compare as unequal. 498 static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0}; 499 static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0}; 500 501 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 502 c->setStrength(Collator::TERTIARY); 503 504 if (c->compare(s1,s2) == 0) 505 { 506 errln("Same-class combining chars were reordered"); 507 } 508 509 delete c; 510 } 511 512 // @bug 4079231 513 // 514 // RuleBasedCollator::operator==(NULL) throws NullPointerException 515 // 516 void CollationRegressionTest::Test4079231(/* char* par */) 517 { 518 // I don't think there's any way to write this test 519 // in C++. The following is equivalent to the Java, 520 // but doesn't compile 'cause NULL can't be converted 521 // to Collator& 522 // 523 // if (en_us->operator==(NULL)) 524 // { 525 // errln("en_us->operator==(NULL) returned TRUE"); 526 // } 527 528 /* 529 try { 530 if (en_us->equals(null)) { 531 errln("en_us->equals(null) returned true"); 532 } 533 } 534 catch (Exception e) { 535 errln("en_us->equals(null) threw " + e.toString()); 536 } 537 */ 538 } 539 540 // @bug 4078588 541 // 542 // RuleBasedCollator breaks on "< a < bb" rule 543 // 544 void CollationRegressionTest::Test4078588(/* char *par */) 545 { 546 UErrorCode status = U_ZERO_ERROR; 547 RuleBasedCollator *rbc = new RuleBasedCollator((UnicodeString)"< a < bb", status); 548 549 if (rbc == NULL || U_FAILURE(status)) 550 { 551 errln("Failed to create RuleBasedCollator."); 552 delete rbc; 553 return; 554 } 555 556 Collator::EComparisonResult result = rbc->compare("a","bb"); 557 558 if (result != Collator::LESS) 559 { 560 errln((UnicodeString)"Compare(a,bb) returned " + (int)result 561 + (UnicodeString)"; expected -1"); 562 } 563 564 delete rbc; 565 } 566 567 // @bug 4081866 568 // 569 // Combining characters in different classes not reordered properly. 570 // 571 void CollationRegressionTest::Test4081866(/* char* par */) 572 { 573 // These combining characters are all in different classes, 574 // so they should be reordered and the strings should compare as equal. 575 static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0}; 576 static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0}; 577 578 UErrorCode status = U_ZERO_ERROR; 579 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 580 c->setStrength(Collator::TERTIARY); 581 582 // Now that the default collators are set to NO_DECOMPOSITION 583 // (as a result of fixing bug 4114077), we must set it explicitly 584 // when we're testing reordering behavior. -- lwerner, 5/5/98 585 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 586 587 if (c->compare(s1,s2) != 0) 588 { 589 errln("Combining chars were not reordered"); 590 } 591 592 delete c; 593 } 594 595 // @bug 4087241 596 // 597 // string comparison errors in Scandinavian collators 598 // 599 void CollationRegressionTest::Test4087241(/* char* par */) 600 { 601 UErrorCode status = U_ZERO_ERROR; 602 Locale da_DK("da", "DK"); 603 RuleBasedCollator *c = NULL; 604 605 c = (RuleBasedCollator *) Collator::createInstance(da_DK, status); 606 607 if (c == NULL || U_FAILURE(status)) 608 { 609 errln("Failed to create collator for da_DK locale"); 610 delete c; 611 return; 612 } 613 614 c->setStrength(Collator::SECONDARY); 615 616 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 617 { 618 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae 619 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-unlaut < a-ring 620 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut 621 }; 622 623 compareArray(*c, tests, ARRAY_LENGTH(tests)); 624 625 delete c; 626 } 627 628 // @bug 4087243 629 // 630 // CollationKey takes ignorable strings into account when it shouldn't 631 // 632 void CollationRegressionTest::Test4087243(/* char* par */) 633 { 634 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 635 c->setStrength(Collator::TERTIARY); 636 637 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 638 { 639 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A 640 }; 641 642 compareArray(*c, tests, ARRAY_LENGTH(tests)); 643 644 delete c; 645 } 646 647 // @bug 4092260 648 // 649 // Mu/micro conflict 650 // Micro symbol and greek lowercase letter Mu should sort identically 651 // 652 void CollationRegressionTest::Test4092260(/* char* par */) 653 { 654 UErrorCode status = U_ZERO_ERROR; 655 Locale el("el", ""); 656 Collator *c = NULL; 657 658 c = Collator::createInstance(el, status); 659 660 if (c == NULL || U_FAILURE(status)) 661 { 662 errln("Failed to create collator for el locale."); 663 delete c; 664 return; 665 } 666 667 // These now have tertiary differences in UCA 668 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); 669 670 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 671 { 672 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0} 673 }; 674 675 compareArray(*c, tests, ARRAY_LENGTH(tests)); 676 677 delete c; 678 } 679 680 // @bug 4095316 681 // 682 void CollationRegressionTest::Test4095316(/* char* par */) 683 { 684 UErrorCode status = U_ZERO_ERROR; 685 Locale el_GR("el", "GR"); 686 Collator *c = Collator::createInstance(el_GR, status); 687 688 if (c == NULL || U_FAILURE(status)) 689 { 690 errln("Failed to create collator for el_GR locale"); 691 delete c; 692 return; 693 } 694 // These now have tertiary differences in UCA 695 //c->setStrength(Collator::TERTIARY); 696 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); 697 698 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 699 { 700 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0} 701 }; 702 703 compareArray(*c, tests, ARRAY_LENGTH(tests)); 704 705 delete c; 706 } 707 708 // @bug 4101940 709 // 710 void CollationRegressionTest::Test4101940(/* char* par */) 711 { 712 UErrorCode status = U_ZERO_ERROR; 713 RuleBasedCollator *c = NULL; 714 UnicodeString rules = "< a < b"; 715 UnicodeString nothing = ""; 716 717 c = new RuleBasedCollator(rules, status); 718 719 if (c == NULL || U_FAILURE(status)) 720 { 721 errln("Failed to create RuleBasedCollator"); 722 delete c; 723 return; 724 } 725 726 CollationElementIterator *i = c->createCollationElementIterator(nothing); 727 i->reset(); 728 729 if (i->next(status) != CollationElementIterator::NULLORDER) 730 { 731 errln("next did not return NULLORDER"); 732 } 733 734 delete i; 735 delete c; 736 } 737 738 // @bug 4103436 739 // 740 // Collator::compare not handling spaces properly 741 // 742 void CollationRegressionTest::Test4103436(/* char* par */) 743 { 744 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 745 c->setStrength(Collator::TERTIARY); 746 747 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 748 { 749 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}, 750 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0} 751 }; 752 753 compareArray(*c, tests, ARRAY_LENGTH(tests)); 754 755 delete c; 756 } 757 758 // @bug 4114076 759 // 760 // Collation not Unicode conformant with Hangul syllables 761 // 762 void CollationRegressionTest::Test4114076(/* char* par */) 763 { 764 UErrorCode status = U_ZERO_ERROR; 765 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 766 c->setStrength(Collator::TERTIARY); 767 768 // 769 // With Canonical decomposition, Hangul syllables should get decomposed 770 // into Jamo, but Jamo characters should not be decomposed into 771 // conjoining Jamo 772 // 773 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 774 { 775 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0} 776 }; 777 778 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 779 compareArray(*c, test1, ARRAY_LENGTH(test1)); 780 781 // From UTR #15: 782 // *In earlier versions of Unicode, jamo characters like ksf 783 // had compatibility mappings to kf + sf. These mappings were 784 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.) 785 // That is, the following test is obsolete as of 2.1.9 786 787 //obsolete- // With Full decomposition, it should go all the way down to 788 //obsolete- // conjoining Jamo characters. 789 //obsolete- // 790 //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = 791 //obsolete- { 792 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0} 793 //obsolete- }; 794 //obsolete- 795 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT); 796 //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2)); 797 798 delete c; 799 } 800 801 802 // @bug 4124632 803 // 804 // Collator::getCollationKey was hanging on certain character sequences 805 // 806 void CollationRegressionTest::Test4124632(/* char* par */) 807 { 808 UErrorCode status = U_ZERO_ERROR; 809 Collator *coll = NULL; 810 811 coll = Collator::createInstance(Locale::getJapan(), status); 812 813 if (coll == NULL || U_FAILURE(status)) 814 { 815 errln("Failed to create collator for Locale::JAPAN"); 816 delete coll; 817 return; 818 } 819 820 static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0}; 821 CollationKey key; 822 823 coll->getCollationKey(test, key, status); 824 825 if (key.isBogus() || U_FAILURE(status)) 826 { 827 errln("CollationKey creation failed."); 828 } 829 830 delete coll; 831 } 832 833 // @bug 4132736 834 // 835 // sort order of french words with multiple accents has errors 836 // 837 void CollationRegressionTest::Test4132736(/* char* par */) 838 { 839 UErrorCode status = U_ZERO_ERROR; 840 841 Collator *c = NULL; 842 843 c = Collator::createInstance(Locale::getFrance(), status); 844 c->setStrength(Collator::TERTIARY); 845 846 if (c == NULL || U_FAILURE(status)) 847 { 848 errln("Failed to create a collator for Locale::getFrance()"); 849 delete c; 850 return; 851 } 852 853 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 854 { 855 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0}, 856 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0} 857 }; 858 859 compareArray(*c, test1, ARRAY_LENGTH(test1)); 860 861 delete c; 862 } 863 864 // @bug 4133509 865 // 866 // The sorting using java.text.CollationKey is not in the exact order 867 // 868 void CollationRegressionTest::Test4133509(/* char* par */) 869 { 870 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 871 { 872 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0}, 873 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0}, 874 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0} 875 }; 876 877 compareArray(*en_us, test1, ARRAY_LENGTH(test1)); 878 } 879 880 // @bug 4114077 881 // 882 // Collation with decomposition off doesn't work for Europe 883 // 884 void CollationRegressionTest::Test4114077(/* char* par */) 885 { 886 // Ensure that we get the same results with decomposition off 887 // as we do with it on.... 888 889 UErrorCode status = U_ZERO_ERROR; 890 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 891 c->setStrength(Collator::TERTIARY); 892 893 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 894 { 895 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent 896 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0}, 897 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0}, 898 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute 899 // -> a, ring, acute 900 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal 901 }; 902 903 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); 904 compareArray(*c, test1, ARRAY_LENGTH(test1)); 905 906 static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = 907 { 908 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal 909 }; 910 911 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 912 compareArray(*c, test2, ARRAY_LENGTH(test2)); 913 914 delete c; 915 } 916 917 // @bug 4141640 918 // 919 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator) 920 // 921 void CollationRegressionTest::Test4141640(/* char* par */) 922 { 923 // 924 // Rather than just creating a Swedish collator, we might as well 925 // try to instantiate one for every locale available on the system 926 // in order to prevent this sort of bug from cropping up in the future 927 // 928 UErrorCode status = U_ZERO_ERROR; 929 int32_t i, localeCount; 930 const Locale *locales = Locale::getAvailableLocales(localeCount); 931 932 for (i = 0; i < localeCount; i += 1) 933 { 934 Collator *c = NULL; 935 936 status = U_ZERO_ERROR; 937 c = Collator::createInstance(locales[i], status); 938 939 if (c == NULL || U_FAILURE(status)) 940 { 941 UnicodeString msg, localeName; 942 943 msg += "Could not create collator for locale "; 944 msg += locales[i].getName(); 945 946 errln(msg); 947 } 948 949 delete c; 950 } 951 } 952 953 // @bug 4139572 954 // 955 // getCollationKey throws exception for spanish text 956 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 957 // 958 void CollationRegressionTest::Test4139572(/* char* par */) 959 { 960 // 961 // Code pasted straight from the bug report 962 // (and then translated to C++ ;-) 963 // 964 // create spanish locale and collator 965 UErrorCode status = U_ZERO_ERROR; 966 Locale l("es", "es"); 967 Collator *col = NULL; 968 969 col = Collator::createInstance(l, status); 970 971 if (col == NULL || U_FAILURE(status)) 972 { 973 errln("Failed to create a collator for es_es locale."); 974 delete col; 975 return; 976 } 977 978 CollationKey key; 979 980 // this spanish phrase kills it! 981 col->getCollationKey("Nombre De Objeto", key, status); 982 983 if (key.isBogus() || U_FAILURE(status)) 984 { 985 errln("Error creating CollationKey for \"Nombre De Ojbeto\""); 986 } 987 988 delete col; 989 } 990 /* HSYS : RuleBasedCollator::compare() performance enhancements 991 compare() does not create CollationElementIterator() anymore.*/ 992 993 class My4146160Collator : public RuleBasedCollator 994 { 995 public: 996 My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status); 997 ~My4146160Collator(); 998 999 CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const; 1000 1001 CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const; 1002 1003 static int32_t count; 1004 }; 1005 1006 int32_t My4146160Collator::count = 0; 1007 1008 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status) 1009 : RuleBasedCollator(rbc.getRules(), status) 1010 { 1011 } 1012 1013 My4146160Collator::~My4146160Collator() 1014 { 1015 } 1016 1017 CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const 1018 { 1019 count += 1; 1020 return RuleBasedCollator::createCollationElementIterator(text); 1021 } 1022 1023 CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const 1024 { 1025 count += 1; 1026 return RuleBasedCollator::createCollationElementIterator(text); 1027 } 1028 1029 // @bug 4146160 1030 // 1031 // RuleBasedCollator doesn't use createCollationElementIterator internally 1032 // 1033 void CollationRegressionTest::Test4146160(/* char* par */) 1034 { 1035 #if 0 1036 // 1037 // Use a custom collator class whose createCollationElementIterator 1038 // methods increment a count.... 1039 // 1040 UErrorCode status = U_ZERO_ERROR; 1041 CollationKey key; 1042 1043 My4146160Collator::count = 0; 1044 My4146160Collator *mc = NULL; 1045 1046 mc = new My4146160Collator(*en_us, status); 1047 1048 if (mc == NULL || U_FAILURE(status)) 1049 { 1050 errln("Failed to create a My4146160Collator."); 1051 delete mc; 1052 return; 1053 } 1054 1055 mc->getCollationKey("1", key, status); 1056 1057 if (key.isBogus() || U_FAILURE(status)) 1058 { 1059 errln("Failure to get a CollationKey from a My4146160Collator."); 1060 delete mc; 1061 return; 1062 } 1063 1064 if (My4146160Collator::count < 1) 1065 { 1066 errln("My4146160Collator::createCollationElementIterator not called for getCollationKey"); 1067 } 1068 1069 My4146160Collator::count = 0; 1070 mc->compare("1", "2"); 1071 1072 if (My4146160Collator::count < 1) 1073 { 1074 errln("My4146160Collator::createtCollationElementIterator not called for compare"); 1075 } 1076 1077 delete mc; 1078 #endif 1079 } 1080 1081 // Ticket 7189 1082 // 1083 // nextSortKeyPart incorrect for EO_S1 collation 1084 static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t keyBufLen, UErrorCode& status) { 1085 UCharIterator uiter; 1086 uint32_t state[2] = { 0, 0 }; 1087 int32_t keyLen; 1088 int32_t count = 8; 1089 1090 uiter_setString(&uiter, text, len); 1091 keyLen = 0; 1092 while (TRUE) { 1093 int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status); 1094 if (U_FAILURE(status)) { 1095 return -1; 1096 } 1097 if (keyPartLen == 0) { 1098 break; 1099 } 1100 keyLen += keyPartLen; 1101 } 1102 return keyLen; 1103 } 1104 1105 void CollationRegressionTest::TestT7189() { 1106 UErrorCode status = U_ZERO_ERROR; 1107 UCollator *coll; 1108 uint32_t i; 1109 1110 static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = { 1111 // "Achter De Hoven" 1112 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, 1113 // "ABC" 1114 { 0x41, 0x42, 0x43, 0x00 }, 1115 // "HELLO world!" 1116 { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } 1117 }; 1118 1119 static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = { 1120 // "Achter de Hoven" 1121 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, 1122 // "abc" 1123 { 0x61, 0x62, 0x63, 0x00 }, 1124 // "hello world!" 1125 { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } 1126 }; 1127 1128 // Open the collator 1129 coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status); 1130 if (U_FAILURE(status)) { 1131 errln("Failed to create a collator for short string EO_S1"); 1132 return; 1133 } 1134 1135 for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) { 1136 uint8_t key1[100], key2[100]; 1137 int32_t len1, len2; 1138 1139 len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status); 1140 if (U_FAILURE(status)) { 1141 errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]); 1142 break; 1143 } 1144 len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status); 1145 if (U_FAILURE(status)) { 1146 errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]); 1147 break; 1148 } 1149 1150 if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) { 1151 errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::hex(key1, len1)); 1152 } else { 1153 logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i] + "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + text2[i] + "\n" + " key2 : " 1154 + TestUtility::hex(key2, len2)); 1155 } 1156 } 1157 ucol_close(coll); 1158 } 1159 1160 void CollationRegressionTest::compareArray(Collator &c, 1161 const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN], 1162 int32_t testCount) 1163 { 1164 int32_t i; 1165 Collator::EComparisonResult expectedResult = Collator::EQUAL; 1166 1167 for (i = 0; i < testCount; i += 3) 1168 { 1169 UnicodeString source(tests[i]); 1170 UnicodeString comparison(tests[i + 1]); 1171 UnicodeString target(tests[i + 2]); 1172 1173 if (comparison == "<") 1174 { 1175 expectedResult = Collator::LESS; 1176 } 1177 else if (comparison == ">") 1178 { 1179 expectedResult = Collator::GREATER; 1180 } 1181 else if (comparison == "=") 1182 { 1183 expectedResult = Collator::EQUAL; 1184 } 1185 else 1186 { 1187 UnicodeString bogus1("Bogus comparison string \""); 1188 UnicodeString bogus2("\""); 1189 errln(bogus1 + comparison + bogus2); 1190 } 1191 1192 Collator::EComparisonResult compareResult = c.compare(source, target); 1193 1194 CollationKey sourceKey, targetKey; 1195 UErrorCode status = U_ZERO_ERROR; 1196 1197 c.getCollationKey(source, sourceKey, status); 1198 1199 if (U_FAILURE(status)) 1200 { 1201 errln("Couldn't get collationKey for source"); 1202 continue; 1203 } 1204 1205 c.getCollationKey(target, targetKey, status); 1206 1207 if (U_FAILURE(status)) 1208 { 1209 errln("Couldn't get collationKey for target"); 1210 continue; 1211 } 1212 1213 Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey); 1214 1215 reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult ); 1216 1217 } 1218 } 1219 1220 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2) 1221 { 1222 int32_t c1, c2, count = 0; 1223 UErrorCode status = U_ZERO_ERROR; 1224 1225 do 1226 { 1227 c1 = i1.next(status); 1228 c2 = i2.next(status); 1229 1230 if (c1 != c2) 1231 { 1232 UnicodeString msg, msg1(" "); 1233 1234 msg += msg1 + count; 1235 msg += ": strength(0x"; 1236 appendHex(c1, 8, msg); 1237 msg += ") != strength(0x"; 1238 appendHex(c2, 8, msg); 1239 msg += ")"; 1240 1241 errln(msg); 1242 break; 1243 } 1244 1245 count += 1; 1246 } 1247 while (c1 != CollationElementIterator::NULLORDER); 1248 } 1249 1250 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */) 1251 { 1252 if (exec) 1253 { 1254 logln("Collation Regression Tests: "); 1255 } 1256 1257 if(en_us) { 1258 switch (index) 1259 { 1260 case 0: name = "Test4048446"; if (exec) Test4048446(/* par */); break; 1261 case 1: name = "Test4051866"; if (exec) Test4051866(/* par */); break; 1262 case 2: name = "Test4053636"; if (exec) Test4053636(/* par */); break; 1263 case 3: name = "Test4054238"; if (exec) Test4054238(/* par */); break; 1264 case 4: name = "Test4054734"; if (exec) Test4054734(/* par */); break; 1265 case 5: name = "Test4054736"; if (exec) Test4054736(/* par */); break; 1266 case 6: name = "Test4058613"; if (exec) Test4058613(/* par */); break; 1267 case 7: name = "Test4059820"; if (exec) Test4059820(/* par */); break; 1268 case 8: name = "Test4060154"; if (exec) Test4060154(/* par */); break; 1269 case 9: name = "Test4062418"; if (exec) Test4062418(/* par */); break; 1270 case 10: name = "Test4065540"; if (exec) Test4065540(/* par */); break; 1271 case 11: name = "Test4066189"; if (exec) Test4066189(/* par */); break; 1272 case 12: name = "Test4066696"; if (exec) Test4066696(/* par */); break; 1273 case 13: name = "Test4076676"; if (exec) Test4076676(/* par */); break; 1274 case 14: name = "Test4078588"; if (exec) Test4078588(/* par */); break; 1275 case 15: name = "Test4079231"; if (exec) Test4079231(/* par */); break; 1276 case 16: name = "Test4081866"; if (exec) Test4081866(/* par */); break; 1277 case 17: name = "Test4087241"; if (exec) Test4087241(/* par */); break; 1278 case 18: name = "Test4087243"; if (exec) Test4087243(/* par */); break; 1279 case 19: name = "Test4092260"; if (exec) Test4092260(/* par */); break; 1280 case 20: name = "Test4095316"; if (exec) Test4095316(/* par */); break; 1281 case 21: name = "Test4101940"; if (exec) Test4101940(/* par */); break; 1282 case 22: name = "Test4103436"; if (exec) Test4103436(/* par */); break; 1283 case 23: name = "Test4114076"; if (exec) Test4114076(/* par */); break; 1284 case 24: name = "Test4114077"; if (exec) Test4114077(/* par */); break; 1285 case 25: name = "Test4124632"; if (exec) Test4124632(/* par */); break; 1286 case 26: name = "Test4132736"; if (exec) Test4132736(/* par */); break; 1287 case 27: name = "Test4133509"; if (exec) Test4133509(/* par */); break; 1288 case 28: name = "Test4139572"; if (exec) Test4139572(/* par */); break; 1289 case 29: name = "Test4141640"; if (exec) Test4141640(/* par */); break; 1290 case 30: name = "Test4146160"; if (exec) Test4146160(/* par */); break; 1291 case 31: name = "TestT7189"; if (exec) TestT7189(); break; 1292 default: name = ""; break; 1293 } 1294 } else { 1295 dataerrln("Class collator not instantiated"); 1296 name = ""; 1297 } 1298 } 1299 1300 #endif /* #if !UCONFIG_NO_COLLATION */ 1301