1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7 #include "unicode/utypes.h" 8 9 #if !UCONFIG_NO_COLLATION 10 11 #include "unicode/coll.h" 12 #include "unicode/localpointer.h" 13 #include "unicode/tblcoll.h" 14 #include "unicode/unistr.h" 15 #include "unicode/sortkey.h" 16 #include "regcoll.h" 17 #include "sfwdchit.h" 18 #include "testutil.h" 19 #include "cmemory.h" 20 21 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0])) 22 23 CollationRegressionTest::CollationRegressionTest() 24 { 25 UErrorCode status = U_ZERO_ERROR; 26 27 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status); 28 if(U_FAILURE(status)) { 29 delete en_us; 30 en_us = 0; 31 errcheckln(status, "Collator creation failed with %s", u_errorName(status)); 32 return; 33 } 34 } 35 36 CollationRegressionTest::~CollationRegressionTest() 37 { 38 delete en_us; 39 } 40 41 42 // @bug 4048446 43 // 44 // CollationElementIterator.reset() doesn't work 45 // 46 void CollationRegressionTest::Test4048446(/* char* par */) 47 { 48 const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?"; 49 const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?"; 50 CollationElementIterator *i1 = en_us->createCollationElementIterator(test1); 51 CollationElementIterator *i2 = en_us->createCollationElementIterator(test1); 52 UErrorCode status = U_ZERO_ERROR; 53 54 if (i1 == NULL|| i2 == NULL) 55 { 56 errln("Could not create CollationElementIterator's"); 57 delete i1; 58 delete i2; 59 return; 60 } 61 62 while (i1->next(status) != CollationElementIterator::NULLORDER) 63 { 64 if (U_FAILURE(status)) 65 { 66 errln("error calling next()"); 67 68 delete i1; 69 delete i2; 70 return; 71 } 72 } 73 74 i1->reset(); 75 76 assertEqual(*i1, *i2); 77 78 delete i1; 79 delete i2; 80 } 81 82 // @bug 4051866 83 // 84 // Collator -> rules -> Collator round-trip broken for expanding characters 85 // 86 void CollationRegressionTest::Test4051866(/* char* par */) 87 { 88 UnicodeString rules; 89 UErrorCode status = U_ZERO_ERROR; 90 91 rules += "&n < o "; 92 rules += "& oe ,o"; 93 rules += (UChar)0x3080; 94 rules += "& oe ,"; 95 rules += (UChar)0x1530; 96 rules += " ,O"; 97 rules += "& OE ,O"; 98 rules += (UChar)0x3080; 99 rules += "& OE ,"; 100 rules += (UChar)0x1520; 101 rules += "< p ,P"; 102 103 // Build a collator containing expanding characters 104 LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status), status); 105 if (U_FAILURE(status)) { 106 errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status)); 107 return; 108 } 109 110 // Build another using the rules from the first 111 LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status), status); 112 if (U_FAILURE(status)) { 113 errln("RuleBasedCollator(rule string from other RBC) failed - %s", u_errorName(status)); 114 return; 115 } 116 117 // Make sure they're the same 118 if (!(c1->getRules() == c2->getRules())) 119 { 120 errln("Rules are not equal"); 121 } 122 } 123 124 // @bug 4053636 125 // 126 // Collator thinks "black-bird" == "black" 127 // 128 void CollationRegressionTest::Test4053636(/* char* par */) 129 { 130 if (en_us->equals("black_bird", "black")) 131 { 132 errln("black-bird == black"); 133 } 134 } 135 136 // @bug 4054238 137 // 138 // CollationElementIterator will not work correctly if the associated 139 // Collator object's mode is changed 140 // 141 void CollationRegressionTest::Test4054238(/* char* par */) 142 { 143 const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0}; 144 const UnicodeString test3(chars3); 145 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 146 147 // NOTE: The Java code uses en_us to create the CollationElementIterators 148 // but I'm pretty sure that's wrong, so I've changed this to use c. 149 UErrorCode status = U_ZERO_ERROR; 150 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 151 CollationElementIterator *i1 = c->createCollationElementIterator(test3); 152 delete i1; 153 delete c; 154 } 155 156 // @bug 4054734 157 // 158 // Collator::IDENTICAL documented but not implemented 159 // 160 void CollationRegressionTest::Test4054734(/* char* par */) 161 { 162 /* 163 Here's the original Java: 164 165 String[] decomp = { 166 "\u0001", "<", "\u0002", 167 "\u0001", "=", "\u0001", 168 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise 169 "\u00C0", "=", "A\u0300" // Decomp should make these equal 170 }; 171 172 String[] nodecomp = { 173 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave 174 }; 175 */ 176 177 static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] = 178 { 179 {0x0001, 0}, {0x3c, 0}, {0x0002, 0}, 180 {0x0001, 0}, {0x3d, 0}, {0x0001, 0}, 181 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0}, 182 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0} 183 }; 184 185 186 UErrorCode status = U_ZERO_ERROR; 187 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 188 189 c->setStrength(Collator::IDENTICAL); 190 191 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 192 compareArray(*c, decomp, ARRAY_LENGTH(decomp)); 193 194 delete c; 195 } 196 197 // @bug 4054736 198 // 199 // Full Decomposition mode not implemented 200 // 201 void CollationRegressionTest::Test4054736(/* char* par */) 202 { 203 UErrorCode status = U_ZERO_ERROR; 204 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 205 206 c->setStrength(Collator::SECONDARY); 207 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 208 209 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 210 { 211 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed 212 }; 213 214 compareArray(*c, tests, ARRAY_LENGTH(tests)); 215 216 delete c; 217 } 218 219 // @bug 4058613 220 // 221 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean 222 // 223 void CollationRegressionTest::Test4058613(/* char* par */) 224 { 225 // Creating a default collator doesn't work when Korean is the default 226 // locale 227 228 Locale oldDefault = Locale::getDefault(); 229 UErrorCode status = U_ZERO_ERROR; 230 231 Locale::setDefault(Locale::getKorean(), status); 232 233 if (U_FAILURE(status)) 234 { 235 errln("Could not set default locale to Locale::KOREAN"); 236 return; 237 } 238 239 Collator *c = NULL; 240 241 c = Collator::createInstance("en_US", status); 242 243 if (c == NULL || U_FAILURE(status)) 244 { 245 errln("Could not create a Korean collator"); 246 Locale::setDefault(oldDefault, status); 247 delete c; 248 return; 249 } 250 251 // Since the fix to this bug was to turn off decomposition for Korean collators, 252 // ensure that's what we got 253 if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF) 254 { 255 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator"); 256 } 257 258 delete c; 259 260 Locale::setDefault(oldDefault, status); 261 } 262 263 // @bug 4059820 264 // 265 // RuleBasedCollator.getRules does not return the exact pattern as input 266 // for expanding character sequences 267 // 268 void CollationRegressionTest::Test4059820(/* char* par */) 269 { 270 UErrorCode status = U_ZERO_ERROR; 271 272 RuleBasedCollator *c = NULL; 273 UnicodeString rules = "&9 < a < b , c/a < d < z"; 274 275 c = new RuleBasedCollator(rules, status); 276 277 if (c == NULL || U_FAILURE(status)) 278 { 279 errln("Failure building a collator."); 280 delete c; 281 return; 282 } 283 284 if ( c->getRules().indexOf("c/a") == -1) 285 { 286 errln("returned rules do not contain 'c/a'"); 287 } 288 289 delete c; 290 } 291 292 // @bug 4060154 293 // 294 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" 295 // 296 void CollationRegressionTest::Test4060154(/* char* par */) 297 { 298 UErrorCode status = U_ZERO_ERROR; 299 UnicodeString rules; 300 301 rules += "&f < g, G < h, H < i, I < j, J"; 302 rules += " & H < "; 303 rules += (UChar)0x0131; 304 rules += ", "; 305 rules += (UChar)0x0130; 306 rules += ", i, I"; 307 308 RuleBasedCollator *c = NULL; 309 310 c = new RuleBasedCollator(rules, status); 311 312 if (c == NULL || U_FAILURE(status)) 313 { 314 errln("failure building collator."); 315 delete c; 316 return; 317 } 318 319 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 320 321 /* 322 String[] tertiary = { 323 "A", "<", "B", 324 "H", "<", "\u0131", 325 "H", "<", "I", 326 "\u0131", "<", "\u0130", 327 "\u0130", "<", "i", 328 "\u0130", ">", "H", 329 }; 330 */ 331 332 static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] = 333 { 334 {0x41, 0}, {0x3c, 0}, {0x42, 0}, 335 {0x48, 0}, {0x3c, 0}, {0x0131, 0}, 336 {0x48, 0}, {0x3c, 0}, {0x49, 0}, 337 {0x0131, 0}, {0x3c, 0}, {0x0130, 0}, 338 {0x0130, 0}, {0x3c, 0}, {0x69, 0}, 339 {0x0130, 0}, {0x3e, 0}, {0x48, 0} 340 }; 341 342 c->setStrength(Collator::TERTIARY); 343 compareArray(*c, tertiary, ARRAY_LENGTH(tertiary)); 344 345 /* 346 String[] secondary = { 347 "H", "<", "I", 348 "\u0131", "=", "\u0130", 349 }; 350 */ 351 static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] = 352 { 353 {0x48, 0}, {0x3c, 0}, {0x49, 0}, 354 {0x0131, 0}, {0x3d, 0}, {0x0130, 0} 355 }; 356 357 c->setStrength(Collator::PRIMARY); 358 compareArray(*c, secondary, ARRAY_LENGTH(secondary)); 359 360 delete c; 361 } 362 363 // @bug 4062418 364 // 365 // Secondary/Tertiary comparison incorrect in French Secondary 366 // 367 void CollationRegressionTest::Test4062418(/* char* par */) 368 { 369 UErrorCode status = U_ZERO_ERROR; 370 371 RuleBasedCollator *c = NULL; 372 373 c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status); 374 375 if (c == NULL || U_FAILURE(status)) 376 { 377 errln("Failed to create collator for Locale::getCanadaFrench()"); 378 delete c; 379 return; 380 } 381 382 c->setStrength(Collator::SECONDARY); 383 384 /* 385 String[] tests = { 386 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater 387 }; 388 */ 389 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 390 { 391 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0} 392 }; 393 394 compareArray(*c, tests, ARRAY_LENGTH(tests)); 395 396 delete c; 397 } 398 399 // @bug 4065540 400 // 401 // Collator::compare() method broken if either string contains spaces 402 // 403 void CollationRegressionTest::Test4065540(/* char* par */) 404 { 405 if (en_us->compare("abcd e", "abcd f") == 0) 406 { 407 errln("'abcd e' == 'abcd f'"); 408 } 409 } 410 411 // @bug 4066189 412 // 413 // Unicode characters need to be recursively decomposed to get the 414 // correct result. For example, 415 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. 416 // 417 void CollationRegressionTest::Test4066189(/* char* par */) 418 { 419 static const UChar chars1[] = {0x1EB1, 0}; 420 static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0}; 421 const UnicodeString test1(chars1); 422 const UnicodeString test2(chars2); 423 UErrorCode status = U_ZERO_ERROR; 424 425 // NOTE: The java code used en_us to create the 426 // CollationElementIterator's. I'm pretty sure that 427 // was wrong, so I've change the code to use c1 and c2 428 RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone(); 429 c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 430 CollationElementIterator *i1 = c1->createCollationElementIterator(test1); 431 432 RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone(); 433 c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); 434 CollationElementIterator *i2 = c2->createCollationElementIterator(test2); 435 436 assertEqual(*i1, *i2); 437 438 delete i2; 439 delete c2; 440 delete i1; 441 delete c1; 442 } 443 444 // @bug 4066696 445 // 446 // French secondary collation checking at the end of compare iteration fails 447 // 448 void CollationRegressionTest::Test4066696(/* char* par */) 449 { 450 UErrorCode status = U_ZERO_ERROR; 451 RuleBasedCollator *c = NULL; 452 453 c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status); 454 455 if (c == NULL || U_FAILURE(status)) 456 { 457 errln("Failure creating collator for Locale::getCanadaFrench()"); 458 delete c; 459 return; 460 } 461 462 c->setStrength(Collator::SECONDARY); 463 464 /* 465 String[] tests = { 466 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute 467 }; 468 469 should be: 470 471 String[] tests = { 472 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute 473 }; 474 475 */ 476 477 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 478 { 479 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0} 480 }; 481 482 compareArray(*c, tests, ARRAY_LENGTH(tests)); 483 484 delete c; 485 } 486 487 // @bug 4076676 488 // 489 // Bad canonicalization of same-class combining characters 490 // 491 void CollationRegressionTest::Test4076676(/* char* par */) 492 { 493 // These combining characters are all in the same class, so they should not 494 // be reordered, and they should compare as unequal. 495 static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0}; 496 static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0}; 497 498 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 499 c->setStrength(Collator::TERTIARY); 500 501 if (c->compare(s1,s2) == 0) 502 { 503 errln("Same-class combining chars were reordered"); 504 } 505 506 delete c; 507 } 508 509 // @bug 4079231 510 // 511 // RuleBasedCollator::operator==(NULL) throws NullPointerException 512 // 513 void CollationRegressionTest::Test4079231(/* char* par */) 514 { 515 // I don't think there's any way to write this test 516 // in C++. The following is equivalent to the Java, 517 // but doesn't compile 'cause NULL can't be converted 518 // to Collator& 519 // 520 // if (en_us->operator==(NULL)) 521 // { 522 // errln("en_us->operator==(NULL) returned TRUE"); 523 // } 524 525 /* 526 try { 527 if (en_us->equals(null)) { 528 errln("en_us->equals(null) returned true"); 529 } 530 } 531 catch (Exception e) { 532 errln("en_us->equals(null) threw " + e.toString()); 533 } 534 */ 535 } 536 537 // @bug 4078588 538 // 539 // RuleBasedCollator breaks on "< a < bb" rule 540 // 541 void CollationRegressionTest::Test4078588(/* char *par */) 542 { 543 UErrorCode status = U_ZERO_ERROR; 544 RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status); 545 546 if (rbc == NULL || U_FAILURE(status)) 547 { 548 errln("Failed to create RuleBasedCollator."); 549 delete rbc; 550 return; 551 } 552 553 Collator::EComparisonResult result = rbc->compare("a","bb"); 554 555 if (result != Collator::LESS) 556 { 557 errln((UnicodeString)"Compare(a,bb) returned " + (int)result 558 + (UnicodeString)"; expected -1"); 559 } 560 561 delete rbc; 562 } 563 564 // @bug 4081866 565 // 566 // Combining characters in different classes not reordered properly. 567 // 568 void CollationRegressionTest::Test4081866(/* char* par */) 569 { 570 // These combining characters are all in different classes, 571 // so they should be reordered and the strings should compare as equal. 572 static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0}; 573 static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0}; 574 575 UErrorCode status = U_ZERO_ERROR; 576 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 577 c->setStrength(Collator::TERTIARY); 578 579 // Now that the default collators are set to NO_DECOMPOSITION 580 // (as a result of fixing bug 4114077), we must set it explicitly 581 // when we're testing reordering behavior. -- lwerner, 5/5/98 582 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 583 584 if (c->compare(s1,s2) != 0) 585 { 586 errln("Combining chars were not reordered"); 587 } 588 589 delete c; 590 } 591 592 // @bug 4087241 593 // 594 // string comparison errors in Scandinavian collators 595 // 596 void CollationRegressionTest::Test4087241(/* char* par */) 597 { 598 UErrorCode status = U_ZERO_ERROR; 599 Locale da_DK("da", "DK"); 600 RuleBasedCollator *c = NULL; 601 602 c = (RuleBasedCollator *) Collator::createInstance(da_DK, status); 603 604 if (c == NULL || U_FAILURE(status)) 605 { 606 errln("Failed to create collator for da_DK locale"); 607 delete c; 608 return; 609 } 610 611 c->setStrength(Collator::SECONDARY); 612 613 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 614 { 615 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae 616 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-umlaut < a-ring 617 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut 618 }; 619 620 compareArray(*c, tests, ARRAY_LENGTH(tests)); 621 622 delete c; 623 } 624 625 // @bug 4087243 626 // 627 // CollationKey takes ignorable strings into account when it shouldn't 628 // 629 void CollationRegressionTest::Test4087243(/* char* par */) 630 { 631 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 632 c->setStrength(Collator::TERTIARY); 633 634 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 635 { 636 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A 637 }; 638 639 compareArray(*c, tests, ARRAY_LENGTH(tests)); 640 641 delete c; 642 } 643 644 // @bug 4092260 645 // 646 // Mu/micro conflict 647 // Micro symbol and greek lowercase letter Mu should sort identically 648 // 649 void CollationRegressionTest::Test4092260(/* char* par */) 650 { 651 UErrorCode status = U_ZERO_ERROR; 652 Locale el("el", ""); 653 Collator *c = NULL; 654 655 c = Collator::createInstance(el, status); 656 657 if (c == NULL || U_FAILURE(status)) 658 { 659 errln("Failed to create collator for el locale."); 660 delete c; 661 return; 662 } 663 664 // These now have tertiary differences in UCA 665 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); 666 667 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 668 { 669 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0} 670 }; 671 672 compareArray(*c, tests, ARRAY_LENGTH(tests)); 673 674 delete c; 675 } 676 677 // @bug 4095316 678 // 679 void CollationRegressionTest::Test4095316(/* char* par */) 680 { 681 UErrorCode status = U_ZERO_ERROR; 682 Locale el_GR("el", "GR"); 683 Collator *c = Collator::createInstance(el_GR, status); 684 685 if (c == NULL || U_FAILURE(status)) 686 { 687 errln("Failed to create collator for el_GR locale"); 688 delete c; 689 return; 690 } 691 // These now have tertiary differences in UCA 692 //c->setStrength(Collator::TERTIARY); 693 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); 694 695 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 696 { 697 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0} 698 }; 699 700 compareArray(*c, tests, ARRAY_LENGTH(tests)); 701 702 delete c; 703 } 704 705 // @bug 4101940 706 // 707 void CollationRegressionTest::Test4101940(/* char* par */) 708 { 709 UErrorCode status = U_ZERO_ERROR; 710 RuleBasedCollator *c = NULL; 711 UnicodeString rules = "&9 < a < b"; 712 UnicodeString nothing = ""; 713 714 c = new RuleBasedCollator(rules, status); 715 716 if (c == NULL || U_FAILURE(status)) 717 { 718 errln("Failed to create RuleBasedCollator"); 719 delete c; 720 return; 721 } 722 723 CollationElementIterator *i = c->createCollationElementIterator(nothing); 724 i->reset(); 725 726 if (i->next(status) != CollationElementIterator::NULLORDER) 727 { 728 errln("next did not return NULLORDER"); 729 } 730 731 delete i; 732 delete c; 733 } 734 735 // @bug 4103436 736 // 737 // Collator::compare not handling spaces properly 738 // 739 void CollationRegressionTest::Test4103436(/* char* par */) 740 { 741 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 742 c->setStrength(Collator::TERTIARY); 743 744 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 745 { 746 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}, 747 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0} 748 }; 749 750 compareArray(*c, tests, ARRAY_LENGTH(tests)); 751 752 delete c; 753 } 754 755 // @bug 4114076 756 // 757 // Collation not Unicode conformant with Hangul syllables 758 // 759 void CollationRegressionTest::Test4114076(/* char* par */) 760 { 761 UErrorCode status = U_ZERO_ERROR; 762 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 763 c->setStrength(Collator::TERTIARY); 764 765 // 766 // With Canonical decomposition, Hangul syllables should get decomposed 767 // into Jamo, but Jamo characters should not be decomposed into 768 // conjoining Jamo 769 // 770 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 771 { 772 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0} 773 }; 774 775 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 776 compareArray(*c, test1, ARRAY_LENGTH(test1)); 777 778 // From UTR #15: 779 // *In earlier versions of Unicode, jamo characters like ksf 780 // had compatibility mappings to kf + sf. These mappings were 781 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.) 782 // That is, the following test is obsolete as of 2.1.9 783 784 //obsolete- // With Full decomposition, it should go all the way down to 785 //obsolete- // conjoining Jamo characters. 786 //obsolete- // 787 //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = 788 //obsolete- { 789 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0} 790 //obsolete- }; 791 //obsolete- 792 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT); 793 //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2)); 794 795 delete c; 796 } 797 798 799 // @bug 4124632 800 // 801 // Collator::getCollationKey was hanging on certain character sequences 802 // 803 void CollationRegressionTest::Test4124632(/* char* par */) 804 { 805 UErrorCode status = U_ZERO_ERROR; 806 Collator *coll = NULL; 807 808 coll = Collator::createInstance(Locale::getJapan(), status); 809 810 if (coll == NULL || U_FAILURE(status)) 811 { 812 errln("Failed to create collator for Locale::JAPAN"); 813 delete coll; 814 return; 815 } 816 817 static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0}; 818 CollationKey key; 819 820 coll->getCollationKey(test, key, status); 821 822 if (key.isBogus() || U_FAILURE(status)) 823 { 824 errln("CollationKey creation failed."); 825 } 826 827 delete coll; 828 } 829 830 // @bug 4132736 831 // 832 // sort order of french words with multiple accents has errors 833 // 834 void CollationRegressionTest::Test4132736(/* char* par */) 835 { 836 UErrorCode status = U_ZERO_ERROR; 837 838 Collator *c = NULL; 839 840 c = Collator::createInstance(Locale::getCanadaFrench(), status); 841 c->setStrength(Collator::TERTIARY); 842 843 if (c == NULL || U_FAILURE(status)) 844 { 845 errln("Failed to create a collator for Locale::getCanadaFrench()"); 846 delete c; 847 return; 848 } 849 850 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 851 { 852 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0}, 853 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0} 854 }; 855 856 compareArray(*c, test1, ARRAY_LENGTH(test1)); 857 858 delete c; 859 } 860 861 // @bug 4133509 862 // 863 // The sorting using java.text.CollationKey is not in the exact order 864 // 865 void CollationRegressionTest::Test4133509(/* char* par */) 866 { 867 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 868 { 869 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0}, 870 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0}, 871 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0} 872 }; 873 874 compareArray(*en_us, test1, ARRAY_LENGTH(test1)); 875 } 876 877 // @bug 4114077 878 // 879 // Collation with decomposition off doesn't work for Europe 880 // 881 void CollationRegressionTest::Test4114077(/* char* par */) 882 { 883 // Ensure that we get the same results with decomposition off 884 // as we do with it on.... 885 886 UErrorCode status = U_ZERO_ERROR; 887 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 888 c->setStrength(Collator::TERTIARY); 889 890 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 891 { 892 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent 893 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0}, 894 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0}, 895 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute 896 // -> a, ring, acute 897 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal 898 }; 899 900 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); 901 compareArray(*c, test1, ARRAY_LENGTH(test1)); 902 903 static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = 904 { 905 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal 906 }; 907 908 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 909 compareArray(*c, test2, ARRAY_LENGTH(test2)); 910 911 delete c; 912 } 913 914 // @bug 4141640 915 // 916 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator) 917 // 918 void CollationRegressionTest::Test4141640(/* char* par */) 919 { 920 // 921 // Rather than just creating a Swedish collator, we might as well 922 // try to instantiate one for every locale available on the system 923 // in order to prevent this sort of bug from cropping up in the future 924 // 925 UErrorCode status = U_ZERO_ERROR; 926 int32_t i, localeCount; 927 const Locale *locales = Locale::getAvailableLocales(localeCount); 928 929 for (i = 0; i < localeCount; i += 1) 930 { 931 Collator *c = NULL; 932 933 status = U_ZERO_ERROR; 934 c = Collator::createInstance(locales[i], status); 935 936 if (c == NULL || U_FAILURE(status)) 937 { 938 UnicodeString msg, localeName; 939 940 msg += "Could not create collator for locale "; 941 msg += locales[i].getName(); 942 943 errln(msg); 944 } 945 946 delete c; 947 } 948 } 949 950 // @bug 4139572 951 // 952 // getCollationKey throws exception for spanish text 953 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 954 // 955 void CollationRegressionTest::Test4139572(/* char* par */) 956 { 957 // 958 // Code pasted straight from the bug report 959 // (and then translated to C++ ;-) 960 // 961 // create spanish locale and collator 962 UErrorCode status = U_ZERO_ERROR; 963 Locale l("es", "es"); 964 Collator *col = NULL; 965 966 col = Collator::createInstance(l, status); 967 968 if (col == NULL || U_FAILURE(status)) 969 { 970 errln("Failed to create a collator for es_es locale."); 971 delete col; 972 return; 973 } 974 975 CollationKey key; 976 977 // this spanish phrase kills it! 978 col->getCollationKey("Nombre De Objeto", key, status); 979 980 if (key.isBogus() || U_FAILURE(status)) 981 { 982 errln("Error creating CollationKey for \"Nombre De Ojbeto\""); 983 } 984 985 delete col; 986 } 987 /* HSYS : RuleBasedCollator::compare() performance enhancements 988 compare() does not create CollationElementIterator() anymore.*/ 989 990 class My4146160Collator : public RuleBasedCollator 991 { 992 public: 993 My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status); 994 ~My4146160Collator(); 995 996 CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const; 997 998 CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const; 999 1000 static int32_t count; 1001 }; 1002 1003 int32_t My4146160Collator::count = 0; 1004 1005 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status) 1006 : RuleBasedCollator(rbc.getRules(), status) 1007 { 1008 } 1009 1010 My4146160Collator::~My4146160Collator() 1011 { 1012 } 1013 1014 CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const 1015 { 1016 count += 1; 1017 return RuleBasedCollator::createCollationElementIterator(text); 1018 } 1019 1020 CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const 1021 { 1022 count += 1; 1023 return RuleBasedCollator::createCollationElementIterator(text); 1024 } 1025 1026 // @bug 4146160 1027 // 1028 // RuleBasedCollator doesn't use createCollationElementIterator internally 1029 // 1030 void CollationRegressionTest::Test4146160(/* char* par */) 1031 { 1032 #if 0 1033 // 1034 // Use a custom collator class whose createCollationElementIterator 1035 // methods increment a count.... 1036 // 1037 UErrorCode status = U_ZERO_ERROR; 1038 CollationKey key; 1039 1040 My4146160Collator::count = 0; 1041 My4146160Collator *mc = NULL; 1042 1043 mc = new My4146160Collator(*en_us, status); 1044 1045 if (mc == NULL || U_FAILURE(status)) 1046 { 1047 errln("Failed to create a My4146160Collator."); 1048 delete mc; 1049 return; 1050 } 1051 1052 mc->getCollationKey("1", key, status); 1053 1054 if (key.isBogus() || U_FAILURE(status)) 1055 { 1056 errln("Failure to get a CollationKey from a My4146160Collator."); 1057 delete mc; 1058 return; 1059 } 1060 1061 if (My4146160Collator::count < 1) 1062 { 1063 errln("My4146160Collator::createCollationElementIterator not called for getCollationKey"); 1064 } 1065 1066 My4146160Collator::count = 0; 1067 mc->compare("1", "2"); 1068 1069 if (My4146160Collator::count < 1) 1070 { 1071 errln("My4146160Collator::createtCollationElementIterator not called for compare"); 1072 } 1073 1074 delete mc; 1075 #endif 1076 } 1077 1078 void CollationRegressionTest::Test4179216() { 1079 // you can position a CollationElementIterator in the middle of 1080 // a contracting character sequence, yielding a bogus collation 1081 // element 1082 IcuTestErrorCode errorCode(*this, "Test4179216"); 1083 RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode); 1084 UnicodeString testText = "church church catcatcher runcrunchynchy"; 1085 CollationElementIterator *iter = coll.createCollationElementIterator(testText); 1086 1087 // test that the "ch" combination works properly 1088 iter->setOffset(4, errorCode); 1089 int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1090 1091 iter->reset(); 1092 int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1093 1094 iter->setOffset(5, errorCode); 1095 int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1096 1097 // Compares and prints only 16-bit primary weights. 1098 if (elt4 != elt0 || elt5 != elt0) { 1099 errln("The collation elements at positions 0 (0x%04x), " 1100 "4 (0x%04x), and 5 (0x%04x) don't match.", 1101 elt0, elt4, elt5); 1102 } 1103 1104 // test that the "cat" combination works properly 1105 iter->setOffset(14, errorCode); 1106 int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1107 1108 iter->setOffset(15, errorCode); 1109 int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1110 1111 iter->setOffset(16, errorCode); 1112 int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1113 1114 iter->setOffset(17, errorCode); 1115 int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1116 1117 iter->setOffset(18, errorCode); 1118 int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1119 1120 iter->setOffset(19, errorCode); 1121 int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1122 1123 // Compares and prints only 16-bit primary weights. 1124 if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17 1125 || elt14 != elt18 || elt14 != elt19) { 1126 errln("\"cat\" elements don't match: elt14 = 0x%04x, " 1127 "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, " 1128 "elt18 = 0x%04x, elt19 = 0x%04x", 1129 elt14, elt15, elt16, elt17, elt18, elt19); 1130 } 1131 1132 // now generate a complete list of the collation elements, 1133 // first using next() and then using setOffset(), and 1134 // make sure both interfaces return the same set of elements 1135 iter->reset(); 1136 1137 int32_t elt = iter->next(errorCode); 1138 int32_t count = 0; 1139 while (elt != CollationElementIterator::NULLORDER) { 1140 ++count; 1141 elt = iter->next(errorCode); 1142 } 1143 1144 LocalArray<UnicodeString> nextElements(new UnicodeString[count]); 1145 LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]); 1146 int32_t lastPos = 0; 1147 1148 iter->reset(); 1149 elt = iter->next(errorCode); 1150 count = 0; 1151 while (elt != CollationElementIterator::NULLORDER) { 1152 nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset()); 1153 lastPos = iter->getOffset(); 1154 elt = iter->next(errorCode); 1155 } 1156 int32_t nextElementsLength = count; 1157 count = 0; 1158 for (int32_t i = 0; i < testText.length(); ) { 1159 iter->setOffset(i, errorCode); 1160 lastPos = iter->getOffset(); 1161 elt = iter->next(errorCode); 1162 setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset()); 1163 i = iter->getOffset(); 1164 } 1165 for (int32_t i = 0; i < nextElementsLength; i++) { 1166 if (nextElements[i] == setOffsetElements[i]) { 1167 logln(nextElements[i]); 1168 } else { 1169 errln(UnicodeString("Error: next() yielded ") + nextElements[i] + 1170 ", but setOffset() yielded " + setOffsetElements[i]); 1171 } 1172 } 1173 delete iter; 1174 } 1175 1176 // Ticket 7189 1177 // 1178 // nextSortKeyPart incorrect for EO_S1 collation 1179 static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) { 1180 UCharIterator uiter; 1181 uint32_t state[2] = { 0, 0 }; 1182 int32_t keyLen; 1183 int32_t count = 8; 1184 1185 uiter_setString(&uiter, text, len); 1186 keyLen = 0; 1187 while (TRUE) { 1188 int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status); 1189 if (U_FAILURE(status)) { 1190 return -1; 1191 } 1192 if (keyPartLen == 0) { 1193 break; 1194 } 1195 keyLen += keyPartLen; 1196 } 1197 return keyLen; 1198 } 1199 1200 void CollationRegressionTest::TestT7189() { 1201 UErrorCode status = U_ZERO_ERROR; 1202 UCollator *coll; 1203 uint32_t i; 1204 1205 static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = { 1206 // "Achter De Hoven" 1207 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, 1208 // "ABC" 1209 { 0x41, 0x42, 0x43, 0x00 }, 1210 // "HELLO world!" 1211 { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } 1212 }; 1213 1214 static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = { 1215 // "Achter de Hoven" 1216 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, 1217 // "abc" 1218 { 0x61, 0x62, 0x63, 0x00 }, 1219 // "hello world!" 1220 { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } 1221 }; 1222 1223 // Open the collator 1224 coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status); 1225 if (U_FAILURE(status)) { 1226 errln("Failed to create a collator for short string EO_S1"); 1227 return; 1228 } 1229 1230 for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) { 1231 uint8_t key1[100], key2[100]; 1232 int32_t len1, len2; 1233 1234 len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status); 1235 if (U_FAILURE(status)) { 1236 errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]); 1237 break; 1238 } 1239 len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status); 1240 if (U_FAILURE(status)) { 1241 errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]); 1242 break; 1243 } 1244 1245 if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) { 1246 errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::hex(key1, len1)); 1247 } else { 1248 logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i] + "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + text2[i] + "\n" + " key2 : " 1249 + TestUtility::hex(key2, len2)); 1250 } 1251 } 1252 ucol_close(coll); 1253 } 1254 1255 void CollationRegressionTest::TestCaseFirstCompression() { 1256 RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone(); 1257 UErrorCode status = U_ZERO_ERROR; 1258 1259 // default 1260 caseFirstCompressionSub(col, "default"); 1261 1262 // Upper first 1263 col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); 1264 if (U_FAILURE(status)) { 1265 errln("Failed to set UCOL_UPPER_FIRST"); 1266 return; 1267 } 1268 caseFirstCompressionSub(col, "upper first"); 1269 1270 // Lower first 1271 col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); 1272 if (U_FAILURE(status)) { 1273 errln("Failed to set UCOL_LOWER_FIRST"); 1274 return; 1275 } 1276 caseFirstCompressionSub(col, "lower first"); 1277 1278 delete col; 1279 } 1280 1281 void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) { 1282 const int32_t maxLength = 50; 1283 1284 UChar str1[maxLength]; 1285 UChar str2[maxLength]; 1286 1287 CollationKey key1, key2; 1288 1289 for (int32_t len = 1; len <= maxLength; len++) { 1290 int32_t i = 0; 1291 for (; i < len - 1; i++) { 1292 str1[i] = str2[i] = (UChar)0x61; // 'a' 1293 } 1294 str1[i] = (UChar)0x41; // 'A' 1295 str2[i] = (UChar)0x61; // 'a' 1296 1297 UErrorCode status = U_ZERO_ERROR; 1298 col->getCollationKey(str1, len, key1, status); 1299 col->getCollationKey(str2, len, key2, status); 1300 1301 UCollationResult cmpKey = key1.compareTo(key2, status); 1302 UCollationResult cmpCol = col->compare(str1, len, str2, len, status); 1303 1304 if (U_FAILURE(status)) { 1305 errln("Error in caseFirstCompressionSub"); 1306 } else if (cmpKey != cmpCol) { 1307 errln((UnicodeString)"Inconsistent comparison(" + opt 1308 + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len) 1309 + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol); 1310 } 1311 } 1312 } 1313 1314 void CollationRegressionTest::TestTrailingComment() { 1315 // ICU ticket #8070: 1316 // Check that the rule parser handles a comment without terminating end-of-line. 1317 IcuTestErrorCode errorCode(*this, "TestTrailingComment"); 1318 RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode); 1319 UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63); 1320 assertTrue("c<b", coll.compare(c, b) < 0); 1321 assertTrue("b<a", coll.compare(b, a) < 0); 1322 } 1323 1324 void CollationRegressionTest::TestBeforeWithTooStrongAfter() { 1325 // ICU ticket #9959: 1326 // Forbid rules with a before-reset followed by a stronger relation. 1327 IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter"); 1328 RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode); 1329 if(errorCode.isSuccess()) { 1330 errln("should forbid before-2-reset followed by primary relation"); 1331 } else { 1332 errorCode.reset(); 1333 } 1334 RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode); 1335 if(errorCode.isSuccess()) { 1336 errln("should forbid before-3-reset followed by primary or secondary relation"); 1337 } else { 1338 errorCode.reset(); 1339 } 1340 } 1341 1342 void CollationRegressionTest::compareArray(Collator &c, 1343 const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN], 1344 int32_t testCount) 1345 { 1346 int32_t i; 1347 Collator::EComparisonResult expectedResult = Collator::EQUAL; 1348 1349 for (i = 0; i < testCount; i += 3) 1350 { 1351 UnicodeString source(tests[i]); 1352 UnicodeString comparison(tests[i + 1]); 1353 UnicodeString target(tests[i + 2]); 1354 1355 if (comparison == "<") 1356 { 1357 expectedResult = Collator::LESS; 1358 } 1359 else if (comparison == ">") 1360 { 1361 expectedResult = Collator::GREATER; 1362 } 1363 else if (comparison == "=") 1364 { 1365 expectedResult = Collator::EQUAL; 1366 } 1367 else 1368 { 1369 UnicodeString bogus1("Bogus comparison string \""); 1370 UnicodeString bogus2("\""); 1371 errln(bogus1 + comparison + bogus2); 1372 } 1373 1374 Collator::EComparisonResult compareResult = c.compare(source, target); 1375 1376 CollationKey sourceKey, targetKey; 1377 UErrorCode status = U_ZERO_ERROR; 1378 1379 c.getCollationKey(source, sourceKey, status); 1380 1381 if (U_FAILURE(status)) 1382 { 1383 errln("Couldn't get collationKey for source"); 1384 continue; 1385 } 1386 1387 c.getCollationKey(target, targetKey, status); 1388 1389 if (U_FAILURE(status)) 1390 { 1391 errln("Couldn't get collationKey for target"); 1392 continue; 1393 } 1394 1395 Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey); 1396 1397 reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult ); 1398 1399 } 1400 } 1401 1402 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2) 1403 { 1404 int32_t c1, c2, count = 0; 1405 UErrorCode status = U_ZERO_ERROR; 1406 1407 do 1408 { 1409 c1 = i1.next(status); 1410 c2 = i2.next(status); 1411 1412 if (c1 != c2) 1413 { 1414 UnicodeString msg, msg1(" "); 1415 1416 msg += msg1 + count; 1417 msg += ": strength(0x"; 1418 appendHex(c1, 8, msg); 1419 msg += ") != strength(0x"; 1420 appendHex(c2, 8, msg); 1421 msg += ")"; 1422 1423 errln(msg); 1424 break; 1425 } 1426 1427 count += 1; 1428 } 1429 while (c1 != CollationElementIterator::NULLORDER); 1430 } 1431 1432 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */) 1433 { 1434 if (exec) 1435 { 1436 logln("Collation Regression Tests: "); 1437 } 1438 1439 if(en_us == NULL) { 1440 dataerrln("Class collator not instantiated"); 1441 name = ""; 1442 return; 1443 } 1444 TESTCASE_AUTO_BEGIN; 1445 TESTCASE_AUTO(Test4048446); 1446 TESTCASE_AUTO(Test4051866); 1447 TESTCASE_AUTO(Test4053636); 1448 TESTCASE_AUTO(Test4054238); 1449 TESTCASE_AUTO(Test4054734); 1450 TESTCASE_AUTO(Test4054736); 1451 TESTCASE_AUTO(Test4058613); 1452 TESTCASE_AUTO(Test4059820); 1453 TESTCASE_AUTO(Test4060154); 1454 TESTCASE_AUTO(Test4062418); 1455 TESTCASE_AUTO(Test4065540); 1456 TESTCASE_AUTO(Test4066189); 1457 TESTCASE_AUTO(Test4066696); 1458 TESTCASE_AUTO(Test4076676); 1459 TESTCASE_AUTO(Test4078588); 1460 TESTCASE_AUTO(Test4079231); 1461 TESTCASE_AUTO(Test4081866); 1462 TESTCASE_AUTO(Test4087241); 1463 TESTCASE_AUTO(Test4087243); 1464 TESTCASE_AUTO(Test4092260); 1465 TESTCASE_AUTO(Test4095316); 1466 TESTCASE_AUTO(Test4101940); 1467 TESTCASE_AUTO(Test4103436); 1468 TESTCASE_AUTO(Test4114076); 1469 TESTCASE_AUTO(Test4114077); 1470 TESTCASE_AUTO(Test4124632); 1471 TESTCASE_AUTO(Test4132736); 1472 TESTCASE_AUTO(Test4133509); 1473 TESTCASE_AUTO(Test4139572); 1474 TESTCASE_AUTO(Test4141640); 1475 TESTCASE_AUTO(Test4146160); 1476 TESTCASE_AUTO(Test4179216); 1477 TESTCASE_AUTO(TestT7189); 1478 TESTCASE_AUTO(TestCaseFirstCompression); 1479 TESTCASE_AUTO(TestTrailingComment); 1480 TESTCASE_AUTO(TestBeforeWithTooStrongAfter); 1481 TESTCASE_AUTO_END; 1482 } 1483 1484 #endif /* #if !UCONFIG_NO_COLLATION */ 1485