1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 //=============================================================================== 9 // 10 // File apicoll.cpp 11 // 12 // 13 // 14 // Created by: Helena Shih 15 // 16 // Modification History: 17 // 18 // Date Name Description 19 // 2/5/97 aliu Added streamIn and streamOut methods. Added 20 // constructor which reads RuleBasedCollator object from 21 // a binary file. Added writeToFile method which streams 22 // RuleBasedCollator out to a binary file. The streamIn 23 // and streamOut methods use istream and ostream objects 24 // in binary mode. 25 // 6/30/97 helena Added tests for CollationElementIterator::setText, getOffset 26 // setOffset and DecompositionIterator::getOffset, setOffset. 27 // DecompositionIterator is made public so add class scope 28 // testing. 29 // 02/10/98 damiba Added test for compare(UnicodeString&, UnicodeString&, int32_t) 30 //=============================================================================== 31 32 #include "unicode/utypes.h" 33 34 #if !UCONFIG_NO_COLLATION 35 36 #include "unicode/localpointer.h" 37 #include "unicode/coll.h" 38 #include "unicode/tblcoll.h" 39 #include "unicode/coleitr.h" 40 #include "unicode/sortkey.h" 41 #include "apicoll.h" 42 #include "unicode/chariter.h" 43 #include "unicode/schriter.h" 44 #include "unicode/strenum.h" 45 #include "unicode/ustring.h" 46 #include "unicode/ucol.h" 47 48 #include "sfwdchit.h" 49 #include "cmemory.h" 50 #include <stdlib.h> 51 52 void 53 CollationAPITest::doAssert(UBool condition, const char *message) 54 { 55 if (!condition) { 56 errln(UnicodeString("ERROR : ") + message); 57 } 58 } 59 60 // Collator Class Properties 61 // ctor, dtor, createInstance, compare, getStrength/setStrength 62 // getDecomposition/setDecomposition, getDisplayName 63 void 64 CollationAPITest::TestProperty(/* char* par */) 65 { 66 UErrorCode success = U_ZERO_ERROR; 67 Collator *col = 0; 68 /* 69 * Expected version of the English collator. 70 * Currently, the major/minor version numbers change when the builder code 71 * changes, 72 * number 2 is from the tailoring data version and 73 * number 3 is the UCA version. 74 * This changes with every UCA version change, and the expected value 75 * needs to be adjusted. 76 * Same in cintltst/capitst.c. 77 */ 78 UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A}; // from ICU 4.4/UCA 5.2 79 UVersionInfo versionArray; 80 81 logln("The property tests begin : "); 82 logln("Test ctors : "); 83 col = Collator::createInstance(Locale::getEnglish(), success); 84 if (U_FAILURE(success)){ 85 errcheckln(success, "English Collator creation failed. - %s", u_errorName(success)); 86 return; 87 } 88 89 col->getVersion(versionArray); 90 // Check for a version greater than some value rather than equality 91 // so that we need not update the expected version each time. 92 if (uprv_memcmp(versionArray, currVersionArray, 4)<0) { 93 errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x", 94 versionArray[0], versionArray[1], versionArray[2], versionArray[3]); 95 } else { 96 logln("Collator::getVersion() result: %02x.%02x.%02x.%02x", 97 versionArray[0], versionArray[1], versionArray[2], versionArray[3]); 98 } 99 100 doAssert((col->compare("ab", "abc") == Collator::LESS), "ab < abc comparison failed"); 101 doAssert((col->compare("ab", "AB") == Collator::LESS), "ab < AB comparison failed"); 102 doAssert((col->compare("blackbird", "black-bird") == Collator::GREATER), "black-bird > blackbird comparison failed"); 103 doAssert((col->compare("black bird", "black-bird") == Collator::LESS), "black bird > black-bird comparison failed"); 104 doAssert((col->compare("Hello", "hello") == Collator::GREATER), "Hello > hello comparison failed"); 105 doAssert((col->compare("","",success) == UCOL_EQUAL), "Comparison between empty strings failed"); 106 107 doAssert((col->compareUTF8("\x61\x62\xc3\xa4", "\x61\x62\xc3\x9f", success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UTF-8 comparison failed"); 108 success = U_ZERO_ERROR; 109 { 110 UnicodeString abau=UNICODE_STRING_SIMPLE("\\x61\\x62\\xe4").unescape(); 111 UnicodeString abss=UNICODE_STRING_SIMPLE("\\x61\\x62\\xdf").unescape(); 112 UCharIterator abauIter, abssIter; 113 uiter_setReplaceable(&abauIter, &abau); 114 uiter_setReplaceable(&abssIter, &abss); 115 doAssert((col->compare(abauIter, abssIter, success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UCharIterator comparison failed"); 116 success = U_ZERO_ERROR; 117 } 118 119 /*start of update [Bertrand A. D. 02/10/98]*/ 120 doAssert((col->compare("ab", "abc", 2) == Collator::EQUAL), "ab = abc with length 2 comparison failed"); 121 doAssert((col->compare("ab", "AB", 2) == Collator::LESS), "ab < AB with length 2 comparison failed"); 122 doAssert((col->compare("ab", "Aa", 1) == Collator::LESS), "ab < Aa with length 1 comparison failed"); 123 doAssert((col->compare("ab", "Aa", 2) == Collator::GREATER), "ab > Aa with length 2 comparison failed"); 124 doAssert((col->compare("black-bird", "blackbird", 5) == Collator::EQUAL), "black-bird = blackbird with length of 5 comparison failed"); 125 doAssert((col->compare("black bird", "black-bird", 10) == Collator::LESS), "black bird < black-bird with length 10 comparison failed"); 126 doAssert((col->compare("Hello", "hello", 5) == Collator::GREATER), "Hello > hello with length 5 comparison failed"); 127 /*end of update [Bertrand A. D. 02/10/98]*/ 128 129 130 logln("Test ctors ends."); 131 logln("testing Collator::getStrength() method ..."); 132 doAssert((col->getStrength() == Collator::TERTIARY), "collation object has the wrong strength"); 133 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference"); 134 135 136 logln("testing Collator::setStrength() method ..."); 137 col->setStrength(Collator::SECONDARY); 138 doAssert((col->getStrength() != Collator::TERTIARY), "collation object's strength is secondary difference"); 139 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference"); 140 doAssert((col->getStrength() == Collator::SECONDARY), "collation object has the wrong strength"); 141 142 UnicodeString name; 143 144 logln("Get display name for the US English collation in German : "); 145 logln(Collator::getDisplayName(Locale::getUS(), Locale::getGerman(), name)); 146 doAssert((name == UnicodeString("Englisch (Vereinigte Staaten)")), "getDisplayName failed"); 147 148 logln("Get display name for the US English collation in English : "); 149 logln(Collator::getDisplayName(Locale::getUS(), Locale::getEnglish(), name)); 150 doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed"); 151 #if 0 152 // weiv : this test is bogus if we're running on any machine that has different default locale than English. 153 // Therefore, it is banned! 154 logln("Get display name for the US English in default locale language : "); 155 logln(Collator::getDisplayName(Locale::US, name)); 156 doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed if this is an English machine"); 157 #endif 158 delete col; col = 0; 159 RuleBasedCollator *rcol = (RuleBasedCollator *)Collator::createInstance("da_DK", 160 success); 161 if (U_FAILURE(success)) { 162 errcheckln(success, "Collator::createInstance(\"da_DK\") failed - %s", u_errorName(success)); 163 return; 164 } 165 const UnicodeString &daRules = rcol->getRules(); 166 if(daRules.isEmpty()) { 167 dataerrln("missing da_DK tailoring rule string"); 168 } else { 169 doAssert(daRules.indexOf("aa") >= 0, "da_DK rules do not contain 'aa'"); 170 } 171 delete rcol; 172 173 col = Collator::createInstance(Locale::getFrench(), success); 174 if (U_FAILURE(success)) 175 { 176 errln("Creating French collation failed."); 177 return; 178 } 179 180 col->setStrength(Collator::PRIMARY); 181 logln("testing Collator::getStrength() method again ..."); 182 doAssert((col->getStrength() != Collator::TERTIARY), "collation object has the wrong strength"); 183 doAssert((col->getStrength() == Collator::PRIMARY), "collation object's strength is not primary difference"); 184 185 logln("testing French Collator::setStrength() method ..."); 186 col->setStrength(Collator::TERTIARY); 187 doAssert((col->getStrength() == Collator::TERTIARY), "collation object's strength is not tertiary difference"); 188 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference"); 189 doAssert((col->getStrength() != Collator::SECONDARY), "collation object's strength is secondary difference"); 190 delete col; 191 192 logln("Create junk collation: "); 193 Locale abcd("ab", "CD", ""); 194 success = U_ZERO_ERROR; 195 Collator *junk = 0; 196 junk = Collator::createInstance(abcd, success); 197 198 if (U_FAILURE(success)) 199 { 200 errln("Junk collation creation failed, should at least return default."); 201 return; 202 } 203 204 doAssert(((RuleBasedCollator *)junk)->getRules().isEmpty(), 205 "The root collation should be returned for an unsupported language."); 206 Collator *frCol = Collator::createInstance(Locale::getCanadaFrench(), success); 207 if (U_FAILURE(success)) 208 { 209 errln("Creating fr_CA collator failed."); 210 delete junk; 211 return; 212 } 213 214 // If the default locale isn't French, the French and non-French collators 215 // should be different 216 if (frCol->getLocale(ULOC_ACTUAL_LOCALE, success) != Locale::getCanadaFrench()) { 217 doAssert((*frCol != *junk), "The junk is the same as the fr_CA collator."); 218 } 219 Collator *aFrCol = frCol->clone(); 220 doAssert((*frCol == *aFrCol), "The cloning of a fr_CA collator failed."); 221 logln("Collator property test ended."); 222 223 delete frCol; 224 delete aFrCol; 225 delete junk; 226 } 227 228 void CollationAPITest::TestKeywordValues() { 229 IcuTestErrorCode errorCode(*this, "TestKeywordValues"); 230 LocalPointer<Collator> col(Collator::createInstance(Locale::getEnglish(), errorCode)); 231 if (errorCode.errIfFailureAndReset("English Collator creation failed")) { 232 return; 233 } 234 235 LocalPointer<StringEnumeration> kwEnum( 236 col->getKeywordValuesForLocale("collation", Locale::getEnglish(), TRUE, errorCode)); 237 if (errorCode.errIfFailureAndReset("Get Keyword Values for English Collator failed")) { 238 return; 239 } 240 assertTrue("expect at least one collation tailoring for English", kwEnum->count(errorCode) > 0); 241 const char *kw; 242 UBool hasStandard = FALSE; 243 while ((kw = kwEnum->next(NULL, errorCode)) != NULL) { 244 if (strcmp(kw, "standard") == 0) { 245 hasStandard = TRUE; 246 } 247 } 248 assertTrue("expect at least the 'standard' collation tailoring for English", hasStandard); 249 } 250 251 void 252 CollationAPITest::TestRuleBasedColl() 253 { 254 RuleBasedCollator *col1, *col2, *col3, *col4; 255 UErrorCode status = U_ZERO_ERROR; 256 257 UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E"); 258 UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E"); 259 260 col1 = new RuleBasedCollator(ruleset1, status); 261 if (U_FAILURE(status)) { 262 errcheckln(status, "RuleBased Collator creation failed. - %s", u_errorName(status)); 263 return; 264 } 265 else { 266 logln("PASS: RuleBased Collator creation passed\n"); 267 } 268 269 status = U_ZERO_ERROR; 270 col2 = new RuleBasedCollator(ruleset2, status); 271 if (U_FAILURE(status)) { 272 errln("RuleBased Collator creation failed.\n"); 273 return; 274 } 275 else { 276 logln("PASS: RuleBased Collator creation passed\n"); 277 } 278 279 status = U_ZERO_ERROR; 280 Locale locale("aa", "AA"); 281 col3 = (RuleBasedCollator *)Collator::createInstance(locale, status); 282 if (U_FAILURE(status)) { 283 errln("Fallback Collator creation failed.: %s\n"); 284 return; 285 } 286 else { 287 logln("PASS: Fallback Collator creation passed\n"); 288 } 289 delete col3; 290 291 status = U_ZERO_ERROR; 292 col3 = (RuleBasedCollator *)Collator::createInstance(status); 293 if (U_FAILURE(status)) { 294 errln("Default Collator creation failed.: %s\n"); 295 return; 296 } 297 else { 298 logln("PASS: Default Collator creation passed\n"); 299 } 300 301 UnicodeString rule1 = col1->getRules(); 302 UnicodeString rule2 = col2->getRules(); 303 UnicodeString rule3 = col3->getRules(); 304 305 doAssert(rule1 != rule2, "Default collator getRules failed"); 306 doAssert(rule2 != rule3, "Default collator getRules failed"); 307 doAssert(rule1 != rule3, "Default collator getRules failed"); 308 309 col4 = new RuleBasedCollator(rule2, status); 310 if (U_FAILURE(status)) { 311 errln("RuleBased Collator creation failed.\n"); 312 return; 313 } 314 315 UnicodeString rule4 = col4->getRules(); 316 doAssert(rule2 == rule4, "Default collator getRules failed"); 317 int32_t length4 = 0; 318 uint8_t *clonedrule4 = col4->cloneRuleData(length4, status); 319 if (U_FAILURE(status)) { 320 errln("Cloned rule data failed.\n"); 321 return; 322 } 323 324 // free(clonedrule4); BAD API!!!! 325 uprv_free(clonedrule4); 326 327 328 delete col1; 329 delete col2; 330 delete col3; 331 delete col4; 332 } 333 334 void 335 CollationAPITest::TestRules() 336 { 337 RuleBasedCollator *coll; 338 UErrorCode status = U_ZERO_ERROR; 339 UnicodeString rules; 340 341 coll = (RuleBasedCollator *)Collator::createInstance(Locale::getEnglish(), status); 342 if (U_FAILURE(status)) { 343 errcheckln(status, "English Collator creation failed. - %s", u_errorName(status)); 344 return; 345 } 346 else { 347 logln("PASS: RuleBased Collator creation passed\n"); 348 } 349 350 coll->getRules(UCOL_TAILORING_ONLY, rules); 351 if (rules.length() != 0x00) { 352 errln("English tailored rules failed - length is 0x%x expected 0x%x", rules.length(), 0x00); 353 } 354 355 coll->getRules(UCOL_FULL_RULES, rules); 356 if (rules.length() < 0) { 357 errln("English full rules failed"); 358 } 359 delete coll; 360 } 361 362 void 363 CollationAPITest::TestDecomposition() { 364 UErrorCode status = U_ZERO_ERROR; 365 Collator *en_US = Collator::createInstance("en_US", status), 366 *el_GR = Collator::createInstance("el_GR", status), 367 *vi_VN = Collator::createInstance("vi_VN", status); 368 369 if (U_FAILURE(status)) { 370 errcheckln(status, "ERROR: collation creation failed. - %s", u_errorName(status)); 371 return; 372 } 373 374 /* there is no reason to have canonical decomposition in en_US OR default locale */ 375 if (vi_VN->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON) 376 { 377 errln("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n"); 378 } 379 380 if (el_GR->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON) 381 { 382 errln("ERROR: el_GR collation did not have canonical decomposition for normalization!\n"); 383 } 384 385 if (en_US->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF) 386 { 387 errln("ERROR: en_US collation had canonical decomposition for normalization!\n"); 388 } 389 390 delete en_US; 391 delete el_GR; 392 delete vi_VN; 393 } 394 395 void 396 CollationAPITest::TestSafeClone() { 397 static const int CLONETEST_COLLATOR_COUNT = 3; 398 Collator *someCollators [CLONETEST_COLLATOR_COUNT]; 399 Collator *col; 400 UErrorCode err = U_ZERO_ERROR; 401 int index; 402 403 UnicodeString test1("abCda"); 404 UnicodeString test2("abcda"); 405 406 /* one default collator & two complex ones */ 407 someCollators[0] = Collator::createInstance("en_US", err); 408 someCollators[1] = Collator::createInstance("ko", err); 409 someCollators[2] = Collator::createInstance("ja_JP", err); 410 if(U_FAILURE(err)) { 411 errcheckln(err, "Couldn't instantiate collators. Error: %s", u_errorName(err)); 412 delete someCollators[0]; 413 delete someCollators[1]; 414 delete someCollators[2]; 415 return; 416 } 417 418 /* change orig & clone & make sure they are independent */ 419 420 for (index = 0; index < CLONETEST_COLLATOR_COUNT; index++) 421 { 422 col = someCollators[index]->safeClone(); 423 if (col == 0) { 424 errln("SafeClone of collator should not return null\n"); 425 break; 426 } 427 col->setStrength(Collator::TERTIARY); 428 someCollators[index]->setStrength(Collator::PRIMARY); 429 col->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err); 430 someCollators[index]->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err); 431 432 doAssert(col->greater(test1, test2), "Result should be \"abCda\" >>> \"abcda\" "); 433 doAssert(someCollators[index]->equals(test1, test2), "Result should be \"abcda\" == \"abCda\""); 434 delete col; 435 delete someCollators[index]; 436 } 437 } 438 439 void 440 CollationAPITest::TestHashCode(/* char* par */) 441 { 442 logln("hashCode tests begin."); 443 UErrorCode success = U_ZERO_ERROR; 444 Collator *col1 = 0; 445 col1 = Collator::createInstance(Locale::getEnglish(), success); 446 if (U_FAILURE(success)) 447 { 448 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success)); 449 return; 450 } 451 452 Collator *col2 = 0; 453 Locale dk("da", "DK", ""); 454 col2 = Collator::createInstance(dk, success); 455 if (U_FAILURE(success)) 456 { 457 errln("Danish collation creation failed."); 458 return; 459 } 460 461 Collator *col3 = 0; 462 col3 = Collator::createInstance(Locale::getEnglish(), success); 463 if (U_FAILURE(success)) 464 { 465 errln("2nd default collation creation failed."); 466 return; 467 } 468 469 logln("Collator::hashCode() testing ..."); 470 471 doAssert(col1->hashCode() != col2->hashCode(), "Hash test1 result incorrect" ); 472 doAssert(!(col1->hashCode() == col2->hashCode()), "Hash test2 result incorrect" ); 473 doAssert(col1->hashCode() == col3->hashCode(), "Hash result not equal" ); 474 475 logln("hashCode tests end."); 476 delete col1; 477 delete col2; 478 479 UnicodeString test1("Abcda"); 480 UnicodeString test2("abcda"); 481 482 CollationKey sortk1, sortk2, sortk3; 483 UErrorCode status = U_ZERO_ERROR; 484 485 col3->getCollationKey(test1, sortk1, status); 486 col3->getCollationKey(test2, sortk2, status); 487 col3->getCollationKey(test2, sortk3, status); 488 489 doAssert(sortk1.hashCode() != sortk2.hashCode(), "Hash test1 result incorrect"); 490 doAssert(sortk2.hashCode() == sortk3.hashCode(), "Hash result not equal" ); 491 492 delete col3; 493 } 494 495 //---------------------------------------------------------------------------- 496 // CollationKey -- Tests the CollationKey methods 497 // 498 void 499 CollationAPITest::TestCollationKey(/* char* par */) 500 { 501 logln("testing CollationKey begins..."); 502 Collator *col = 0; 503 UErrorCode success=U_ZERO_ERROR; 504 col = Collator::createInstance(Locale::getEnglish(), success); 505 if (U_FAILURE(success)) 506 { 507 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success)); 508 return; 509 } 510 col->setStrength(Collator::TERTIARY); 511 512 CollationKey sortk1, sortk2; 513 UnicodeString test1("Abcda"), test2("abcda"); 514 UErrorCode key1Status = U_ZERO_ERROR, key2Status = U_ZERO_ERROR; 515 516 logln("Testing weird arguments"); 517 // No string vs. empty string vs. completely-ignorable string: 518 // See ICU ticket #10495. 519 CollationKey sortkNone; 520 int32_t length; 521 sortkNone.getByteArray(length); 522 doAssert(!sortkNone.isBogus() && length == 0, 523 "Default-constructed collation key should be empty"); 524 CollationKey sortkEmpty; 525 col->getCollationKey(NULL, 0, sortkEmpty, key1Status); 526 // key gets reset here 527 const uint8_t* byteArrayEmpty = sortkEmpty.getByteArray(length); 528 doAssert(sortkEmpty.isBogus() == FALSE && length == 3 && 529 byteArrayEmpty[0] == 1 && byteArrayEmpty[1] == 1 && byteArrayEmpty[2] == 0, 530 "Empty string should return a collation key with empty levels"); 531 doAssert(sortkNone.compareTo(sortkEmpty) == Collator::LESS, 532 "Expected no collation key < collation key for empty string"); 533 doAssert(sortkEmpty.compareTo(sortkNone) == Collator::GREATER, 534 "Expected collation key for empty string > no collation key"); 535 536 CollationKey sortkIgnorable; 537 // Most control codes and CGJ are completely ignorable. 538 // A string with only completely ignorables must compare equal to an empty string. 539 col->getCollationKey(UnicodeString((UChar)1).append((UChar)0x34f), sortkIgnorable, key1Status); 540 sortkIgnorable.getByteArray(length); 541 doAssert(!sortkIgnorable.isBogus() && length == 3, 542 "Completely ignorable string should return a collation key with empty levels"); 543 doAssert(sortkIgnorable.compareTo(sortkEmpty) == Collator::EQUAL, 544 "Completely ignorable string should compare equal to empty string"); 545 546 // bogus key returned here 547 key1Status = U_ILLEGAL_ARGUMENT_ERROR; 548 col->getCollationKey(NULL, 0, sortk1, key1Status); 549 doAssert(sortk1.isBogus() && (sortk1.getByteArray(length), length) == 0, 550 "Error code should return bogus collation key"); 551 552 key1Status = U_ZERO_ERROR; 553 logln("Use tertiary comparison level testing ...."); 554 555 col->getCollationKey(test1, sortk1, key1Status); 556 if (U_FAILURE(key1Status)) { 557 errln("getCollationKey(Abcda) failed - %s", u_errorName(key1Status)); 558 return; 559 } 560 doAssert((sortk1.compareTo(col->getCollationKey(test2, sortk2, key2Status))) 561 == Collator::GREATER, 562 "Result should be \"Abcda\" >>> \"abcda\""); 563 564 CollationKey sortk3(sortk2), sortkNew; 565 566 sortkNew = sortk1; 567 doAssert((sortk1 != sortk2), "The sort keys should be different"); 568 doAssert((sortk1.hashCode() != sortk2.hashCode()), "sort key hashCode() failed"); 569 doAssert((sortk2 == sortk3), "The sort keys should be the same"); 570 doAssert((sortk1 == sortkNew), "The sort keys assignment failed"); 571 doAssert((sortk1.hashCode() == sortkNew.hashCode()), "sort key hashCode() failed"); 572 doAssert((sortkNew != sortk3), "The sort keys should be different"); 573 doAssert(sortk1.compareTo(sortk3) == Collator::GREATER, "Result should be \"Abcda\" >>> \"abcda\""); 574 doAssert(sortk2.compareTo(sortk3) == Collator::EQUAL, "Result should be \"abcda\" == \"abcda\""); 575 doAssert(sortkEmpty.compareTo(sortk1) == Collator::LESS, "Result should be (empty key) <<< \"Abcda\""); 576 doAssert(sortk1.compareTo(sortkEmpty) == Collator::GREATER, "Result should be \"Abcda\" >>> (empty key)"); 577 doAssert(sortkEmpty.compareTo(sortkEmpty) == Collator::EQUAL, "Result should be (empty key) == (empty key)"); 578 doAssert(sortk1.compareTo(sortk3, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> \"abcda\""); 579 doAssert(sortk2.compareTo(sortk3, success) == UCOL_EQUAL, "Result should be \"abcda\" == \"abcda\""); 580 doAssert(sortkEmpty.compareTo(sortk1, success) == UCOL_LESS, "Result should be (empty key) <<< \"Abcda\""); 581 doAssert(sortk1.compareTo(sortkEmpty, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> (empty key)"); 582 doAssert(sortkEmpty.compareTo(sortkEmpty, success) == UCOL_EQUAL, "Result should be (empty key) == (empty key)"); 583 584 int32_t cnt1, cnt2, cnt3, cnt4; 585 586 const uint8_t* byteArray1 = sortk1.getByteArray(cnt1); 587 const uint8_t* byteArray2 = sortk2.getByteArray(cnt2); 588 589 const uint8_t* byteArray3 = 0; 590 byteArray3 = sortk1.getByteArray(cnt3); 591 592 const uint8_t* byteArray4 = 0; 593 byteArray4 = sortk2.getByteArray(cnt4); 594 595 CollationKey sortk4(byteArray1, cnt1), sortk5(byteArray2, cnt2); 596 CollationKey sortk6(byteArray3, cnt3), sortk7(byteArray4, cnt4); 597 598 doAssert(sortk1.compareTo(sortk4) == Collator::EQUAL, "CollationKey::toByteArray(sortk1) Failed."); 599 doAssert(sortk2.compareTo(sortk5) == Collator::EQUAL, "CollationKey::toByteArray(sortk2) Failed."); 600 doAssert(sortk4.compareTo(sortk5) == Collator::GREATER, "sortk4 >>> sortk5 Failed"); 601 doAssert(sortk1.compareTo(sortk6) == Collator::EQUAL, "CollationKey::getByteArray(sortk1) Failed."); 602 doAssert(sortk2.compareTo(sortk7) == Collator::EQUAL, "CollationKey::getByteArray(sortk2) Failed."); 603 doAssert(sortk6.compareTo(sortk7) == Collator::GREATER, "sortk6 >>> sortk7 Failed"); 604 605 logln("Equality tests : "); 606 doAssert(sortk1 == sortk4, "sortk1 == sortk4 Failed."); 607 doAssert(sortk2 == sortk5, "sortk2 == sortk5 Failed."); 608 doAssert(sortk1 != sortk5, "sortk1 != sortk5 Failed."); 609 doAssert(sortk1 == sortk6, "sortk1 == sortk6 Failed."); 610 doAssert(sortk2 == sortk7, "sortk2 == sortk7 Failed."); 611 doAssert(sortk1 != sortk7, "sortk1 != sortk7 Failed."); 612 613 byteArray1 = 0; 614 byteArray2 = 0; 615 616 sortk3 = sortk1; 617 doAssert(sortk1 == sortk3, "sortk1 = sortk3 assignment Failed."); 618 doAssert(sortk2 != sortk3, "sortk2 != sortk3 Failed."); 619 logln("testing sortkey ends..."); 620 621 col->setStrength(Collator::SECONDARY); 622 doAssert(col->getCollationKey(test1, sortk1, key1Status).compareTo( 623 col->getCollationKey(test2, sortk2, key2Status)) 624 == Collator::EQUAL, 625 "Result should be \"Abcda\" == \"abcda\""); 626 delete col; 627 } 628 629 //---------------------------------------------------------------------------- 630 // Tests the CollatorElementIterator class. 631 // ctor, RuleBasedCollator::createCollationElementIterator(), operator==, operator!= 632 // 633 void 634 CollationAPITest::TestElemIter(/* char* par */) 635 { 636 logln("testing sortkey begins..."); 637 Collator *col = 0; 638 UErrorCode success = U_ZERO_ERROR; 639 col = Collator::createInstance(Locale::getEnglish(), success); 640 if (U_FAILURE(success)) 641 { 642 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success)); 643 return; 644 } 645 646 UnicodeString testString1("XFILE What subset of all possible test cases has the highest probability of detecting the most errors?"); 647 UnicodeString testString2("Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?"); 648 logln("Constructors and comparison testing...."); 649 CollationElementIterator *iterator1 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1); 650 651 CharacterIterator *chariter=new StringCharacterIterator(testString1); 652 CollationElementIterator *coliter=((RuleBasedCollator*)col)->createCollationElementIterator(*chariter); 653 654 // copy ctor 655 CollationElementIterator *iterator2 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1); 656 CollationElementIterator *iterator3 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString2); 657 658 int32_t offset = iterator1->getOffset(); 659 if (offset != 0) { 660 errln("Error in getOffset for collation element iterator\n"); 661 return; 662 } 663 iterator1->setOffset(6, success); 664 if (U_FAILURE(success)) { 665 errln("Error in setOffset for collation element iterator\n"); 666 return; 667 } 668 iterator1->setOffset(0, success); 669 int32_t order1, order2, order3; 670 doAssert((*iterator1 == *iterator2), "The two iterators should be the same"); 671 doAssert((*iterator1 != *iterator3), "The two iterators should be different"); 672 673 doAssert((*coliter == *iterator1), "The two iterators should be the same"); 674 doAssert((*coliter == *iterator2), "The two iterators should be the same"); 675 doAssert((*coliter != *iterator3), "The two iterators should be different"); 676 677 order1 = iterator1->next(success); 678 if (U_FAILURE(success)) 679 { 680 errln("Somehow ran out of memory stepping through the iterator."); 681 return; 682 } 683 684 doAssert((*iterator1 != *iterator2), "The first iterator advance failed"); 685 order2 = iterator2->getOffset(); 686 doAssert((order1 != order2), "The order result should not be the same"); 687 order2 = iterator2->next(success); 688 if (U_FAILURE(success)) 689 { 690 errln("Somehow ran out of memory stepping through the iterator."); 691 return; 692 } 693 694 doAssert((*iterator1 == *iterator2), "The second iterator advance failed"); 695 doAssert((order1 == order2), "The order result should be the same"); 696 order3 = iterator3->next(success); 697 if (U_FAILURE(success)) 698 { 699 errln("Somehow ran out of memory stepping through the iterator."); 700 return; 701 } 702 703 doAssert((CollationElementIterator::primaryOrder(order1) == 704 CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same"); 705 doAssert((CollationElementIterator::secondaryOrder(order1) == 706 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same"); 707 doAssert((CollationElementIterator::tertiaryOrder(order1) == 708 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same"); 709 710 order1 = iterator1->next(success); order3 = iterator3->next(success); 711 if (U_FAILURE(success)) 712 { 713 errln("Somehow ran out of memory stepping through the iterator."); 714 return; 715 } 716 717 doAssert((CollationElementIterator::primaryOrder(order1) == 718 CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical"); 719 doAssert((CollationElementIterator::tertiaryOrder(order1) != 720 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different"); 721 722 order1 = iterator1->next(success); 723 order3 = iterator3->next(success); 724 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */ 725 /* 726 doAssert((CollationElementIterator::secondaryOrder(order1) != 727 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same"); 728 */ 729 doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached"); 730 731 iterator1->reset(); iterator2->reset(); iterator3->reset(); 732 order1 = iterator1->next(success); 733 if (U_FAILURE(success)) 734 { 735 errln("Somehow ran out of memory stepping through the iterator."); 736 return; 737 } 738 739 doAssert((*iterator1 != *iterator2), "The first iterator advance failed"); 740 741 order2 = iterator2->next(success); 742 if (U_FAILURE(success)) 743 { 744 errln("Somehow ran out of memory stepping through the iterator."); 745 return; 746 } 747 748 doAssert((*iterator1 == *iterator2), "The second iterator advance failed"); 749 doAssert((order1 == order2), "The order result should be the same"); 750 751 order3 = iterator3->next(success); 752 if (U_FAILURE(success)) 753 { 754 errln("Somehow ran out of memory stepping through the iterator."); 755 return; 756 } 757 758 doAssert((CollationElementIterator::primaryOrder(order1) == 759 CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same"); 760 doAssert((CollationElementIterator::secondaryOrder(order1) == 761 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same"); 762 doAssert((CollationElementIterator::tertiaryOrder(order1) == 763 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same"); 764 765 order1 = iterator1->next(success); order2 = iterator2->next(success); order3 = iterator3->next(success); 766 if (U_FAILURE(success)) 767 { 768 errln("Somehow ran out of memory stepping through the iterator."); 769 return; 770 } 771 772 doAssert((CollationElementIterator::primaryOrder(order1) == 773 CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical"); 774 doAssert((CollationElementIterator::tertiaryOrder(order1) != 775 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different"); 776 777 order1 = iterator1->next(success); order3 = iterator3->next(success); 778 if (U_FAILURE(success)) 779 { 780 errln("Somehow ran out of memory stepping through the iterator."); 781 return; 782 } 783 784 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */ 785 /* 786 doAssert((CollationElementIterator::secondaryOrder(order1) != 787 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same"); 788 */ 789 doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached"); 790 doAssert((*iterator2 != *iterator3), "The iterators should be different"); 791 792 793 //test error values 794 success=U_UNSUPPORTED_ERROR; 795 Collator *colerror=NULL; 796 colerror=Collator::createInstance(Locale::getEnglish(), success); 797 if (colerror != 0 || success == U_ZERO_ERROR){ 798 errln("Error: createInstance(UErrorCode != U_ZERO_ERROR) should just return and not create an instance\n"); 799 } 800 int32_t position=coliter->previous(success); 801 if(position != CollationElementIterator::NULLORDER){ 802 errln((UnicodeString)"Expected NULLORDER got" + position); 803 } 804 coliter->reset(); 805 coliter->setText(*chariter, success); 806 if(!U_FAILURE(success)){ 807 errln("Expeceted error"); 808 } 809 iterator1->setText((UnicodeString)"hello there", success); 810 if(!U_FAILURE(success)){ 811 errln("Expeceted error"); 812 } 813 814 delete chariter; 815 delete coliter; 816 delete iterator1; 817 delete iterator2; 818 delete iterator3; 819 delete col; 820 821 822 823 logln("testing CollationElementIterator ends..."); 824 } 825 826 // Test RuleBasedCollator ctor, dtor, operator==, operator!=, clone, copy, and getRules 827 void 828 CollationAPITest::TestOperators(/* char* par */) 829 { 830 UErrorCode success = U_ZERO_ERROR; 831 UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E"); 832 UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E"); 833 RuleBasedCollator *col1 = new RuleBasedCollator(ruleset1, success); 834 if (U_FAILURE(success)) { 835 errcheckln(success, "RuleBasedCollator creation failed. - %s", u_errorName(success)); 836 return; 837 } 838 success = U_ZERO_ERROR; 839 RuleBasedCollator *col2 = new RuleBasedCollator(ruleset2, success); 840 if (U_FAILURE(success)) { 841 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set."); 842 return; 843 } 844 logln("The operator tests begin : "); 845 logln("testing operator==, operator!=, clone methods ..."); 846 doAssert((*col1 != *col2), "The two different table collations compared equal"); 847 *col1 = *col2; 848 doAssert((*col1 == *col2), "Collator objects not equal after assignment (operator=)"); 849 850 success = U_ZERO_ERROR; 851 Collator *col3 = Collator::createInstance(Locale::getEnglish(), success); 852 if (U_FAILURE(success)) { 853 errln("Default collation creation failed."); 854 return; 855 } 856 doAssert((*col1 != *col3), "The two different table collations compared equal"); 857 Collator* col4 = col1->clone(); 858 Collator* col5 = col3->clone(); 859 doAssert((*col1 == *col4), "Cloned collation objects not equal"); 860 doAssert((*col3 != *col4), "Two different table collations compared equal"); 861 doAssert((*col3 == *col5), "Cloned collation objects not equal"); 862 doAssert((*col4 != *col5), "Two cloned collations compared equal"); 863 864 const UnicodeString& defRules = ((RuleBasedCollator*)col3)->getRules(); 865 RuleBasedCollator* col6 = new RuleBasedCollator(defRules, success); 866 if (U_FAILURE(success)) { 867 errln("Creating default collation with rules failed."); 868 return; 869 } 870 doAssert((((RuleBasedCollator*)col3)->getRules() == col6->getRules()), "Default collator getRules failed"); 871 872 success = U_ZERO_ERROR; 873 RuleBasedCollator *col7 = new RuleBasedCollator(ruleset2, Collator::TERTIARY, success); 874 if (U_FAILURE(success)) { 875 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength."); 876 return; 877 } 878 success = U_ZERO_ERROR; 879 RuleBasedCollator *col8 = new RuleBasedCollator(ruleset2, UCOL_OFF, success); 880 if (U_FAILURE(success)) { 881 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with Normalizer::NO_OP."); 882 return; 883 } 884 success = U_ZERO_ERROR; 885 RuleBasedCollator *col9 = new RuleBasedCollator(ruleset2, Collator::PRIMARY, UCOL_ON, success); 886 if (U_FAILURE(success)) { 887 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength and Normalizer::NO_OP."); 888 return; 889 } 890 // doAssert((*col7 == *col8), "The two equal table collations compared different"); 891 doAssert((*col7 != *col9), "The two different table collations compared equal"); 892 doAssert((*col8 != *col9), "The two different table collations compared equal"); 893 894 logln("operator tests ended."); 895 delete col1; 896 delete col2; 897 delete col3; 898 delete col4; 899 delete col5; 900 delete col6; 901 delete col7; 902 delete col8; 903 delete col9; 904 } 905 906 // test clone and copy 907 void 908 CollationAPITest::TestDuplicate(/* char* par */) 909 { 910 UErrorCode status = U_ZERO_ERROR; 911 Collator *col1 = Collator::createInstance(Locale::getEnglish(), status); 912 if (U_FAILURE(status)) { 913 logln("Default collator creation failed."); 914 return; 915 } 916 Collator *col2 = col1->clone(); 917 doAssert((*col1 == *col2), "Cloned object is not equal to the orginal"); 918 UnicodeString ruleset("&9 < a, A < b, B < c, C < d, D, e, E"); 919 RuleBasedCollator *col3 = new RuleBasedCollator(ruleset, status); 920 if (U_FAILURE(status)) { 921 logln("Collation tailoring failed."); 922 return; 923 } 924 doAssert((*col1 != *col3), "Cloned object is equal to some dummy"); 925 *col3 = *((RuleBasedCollator*)col1); 926 doAssert((*col1 == *col3), "Copied object is not equal to the orginal"); 927 928 UCollationResult res; 929 UnicodeString first((UChar)0x0061); 930 UnicodeString second((UChar)0x0062); 931 UnicodeString copiedEnglishRules(((RuleBasedCollator*)col1)->getRules()); 932 933 delete col1; 934 935 // Try using the cloned collators after deleting the original data 936 res = col2->compare(first, second, status); 937 if(res != UCOL_LESS) { 938 errln("a should be less then b after tailoring"); 939 } 940 if (((RuleBasedCollator*)col2)->getRules() != copiedEnglishRules) { 941 errln(UnicodeString("English rule difference. ") 942 + copiedEnglishRules + UnicodeString("\ngetRules=") + ((RuleBasedCollator*)col2)->getRules()); 943 } 944 res = col3->compare(first, second, status); 945 if(res != UCOL_LESS) { 946 errln("a should be less then b after tailoring"); 947 } 948 if (col3->getRules() != copiedEnglishRules) { 949 errln(UnicodeString("English rule difference. ") 950 + copiedEnglishRules + UnicodeString("\ngetRules=") + col3->getRules()); 951 } 952 953 delete col2; 954 delete col3; 955 } 956 957 void 958 CollationAPITest::TestCompare(/* char* par */) 959 { 960 logln("The compare tests begin : "); 961 Collator *col = 0; 962 UErrorCode success = U_ZERO_ERROR; 963 col = Collator::createInstance(Locale::getEnglish(), success); 964 if (U_FAILURE(success)) { 965 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success)); 966 return; 967 } 968 UnicodeString test1("Abcda"), test2("abcda"); 969 logln("Use tertiary comparison level testing ...."); 970 971 doAssert((!col->equals(test1, test2) ), "Result should be \"Abcda\" != \"abcda\""); 972 doAssert((col->greater(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\""); 973 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\""); 974 975 col->setStrength(Collator::SECONDARY); 976 logln("Use secondary comparison level testing ...."); 977 978 doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\""); 979 doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\""); 980 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\""); 981 982 col->setStrength(Collator::PRIMARY); 983 logln("Use primary comparison level testing ...."); 984 985 doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\""); 986 doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\""); 987 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\""); 988 989 // Test different APIs 990 const UChar* t1 = test1.getBuffer(); 991 int32_t t1Len = test1.length(); 992 const UChar* t2 = test2.getBuffer(); 993 int32_t t2Len = test2.length(); 994 995 doAssert((col->compare(test1, test2) == Collator::EQUAL), "Problem"); 996 doAssert((col->compare(test1, test2, success) == UCOL_EQUAL), "Problem"); 997 doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::EQUAL), "Problem"); 998 doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_EQUAL), "Problem"); 999 doAssert((col->compare(test1, test2, t1Len) == Collator::EQUAL), "Problem"); 1000 doAssert((col->compare(test1, test2, t1Len, success) == UCOL_EQUAL), "Problem"); 1001 1002 col->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, success); 1003 doAssert((col->compare(test1, test2) == Collator::GREATER), "Problem"); 1004 doAssert((col->compare(test1, test2, success) == UCOL_GREATER), "Problem"); 1005 doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::GREATER), "Problem"); 1006 doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_GREATER), "Problem"); 1007 doAssert((col->compare(test1, test2, t1Len) == Collator::GREATER), "Problem"); 1008 doAssert((col->compare(test1, test2, t1Len, success) == UCOL_GREATER), "Problem"); 1009 1010 1011 1012 logln("The compare tests end."); 1013 delete col; 1014 } 1015 1016 void 1017 CollationAPITest::TestGetAll(/* char* par */) 1018 { 1019 int32_t count1, count2; 1020 UErrorCode status = U_ZERO_ERROR; 1021 1022 logln("Trying Collator::getAvailableLocales(int&)"); 1023 1024 const Locale* list = Collator::getAvailableLocales(count1); 1025 for (int32_t i = 0; i < count1; ++i) { 1026 UnicodeString dispName; 1027 logln(UnicodeString("Locale name: ") 1028 + UnicodeString(list[i].getName()) 1029 + UnicodeString(" , the display name is : ") 1030 + UnicodeString(list[i].getDisplayName(dispName))); 1031 } 1032 1033 if (count1 == 0 || list == NULL) { 1034 dataerrln("getAvailableLocales(int&) returned an empty list"); 1035 } 1036 1037 logln("Trying Collator::getAvailableLocales()"); 1038 StringEnumeration* localeEnum = Collator::getAvailableLocales(); 1039 const UnicodeString* locStr; 1040 const char *locCStr; 1041 count2 = 0; 1042 1043 if (localeEnum == NULL) { 1044 dataerrln("getAvailableLocales() returned NULL"); 1045 return; 1046 } 1047 1048 while ((locStr = localeEnum->snext(status)) != NULL) 1049 { 1050 logln(UnicodeString("Locale name is: ") + *locStr); 1051 count2++; 1052 } 1053 if (count1 != count2) { 1054 errln("getAvailableLocales(int&) returned %d and getAvailableLocales() returned %d", count1, count2); 1055 } 1056 1057 logln("Trying Collator::getAvailableLocales() clone"); 1058 count1 = 0; 1059 StringEnumeration* localeEnum2 = localeEnum->clone(); 1060 localeEnum2->reset(status); 1061 while ((locCStr = localeEnum2->next(NULL, status)) != NULL) 1062 { 1063 logln(UnicodeString("Locale name is: ") + UnicodeString(locCStr)); 1064 count1++; 1065 } 1066 if (count1 != count2) { 1067 errln("getAvailableLocales(3rd time) returned %d and getAvailableLocales(2nd time) returned %d", count1, count2); 1068 } 1069 if (localeEnum->count(status) != count1) { 1070 errln("localeEnum->count() returned %d and getAvailableLocales() returned %d", localeEnum->count(status), count1); 1071 } 1072 delete localeEnum; 1073 delete localeEnum2; 1074 } 1075 1076 void CollationAPITest::TestSortKey() 1077 { 1078 UErrorCode status = U_ZERO_ERROR; 1079 /* 1080 this is supposed to open default date format, but later on it treats 1081 it like it is "en_US" 1082 - very bad if you try to run the tests on machine where default 1083 locale is NOT "en_US" 1084 */ 1085 Collator *col = Collator::createInstance(Locale::getEnglish(), status); 1086 if (U_FAILURE(status)) { 1087 errcheckln(status, "ERROR: Default collation creation failed.: %s\n", u_errorName(status)); 1088 return; 1089 } 1090 1091 if (col->getStrength() != Collator::TERTIARY) 1092 { 1093 errln("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n"); 1094 } 1095 1096 /* Need to use identical strength */ 1097 col->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status); 1098 1099 UChar test1[6] = {0x41, 0x62, 0x63, 0x64, 0x61, 0}, 1100 test2[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0}, 1101 test3[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0}; 1102 1103 uint8_t sortkey1[64]; 1104 uint8_t sortkey2[64]; 1105 uint8_t sortkey3[64]; 1106 1107 logln("Use tertiary comparison level testing ....\n"); 1108 1109 CollationKey key1; 1110 col->getCollationKey(test1, u_strlen(test1), key1, status); 1111 1112 CollationKey key2; 1113 col->getCollationKey(test2, u_strlen(test2), key2, status); 1114 1115 CollationKey key3; 1116 col->getCollationKey(test3, u_strlen(test3), key3, status); 1117 1118 doAssert(key1.compareTo(key2) == Collator::GREATER, 1119 "Result should be \"Abcda\" > \"abcda\""); 1120 doAssert(key2.compareTo(key1) == Collator::LESS, 1121 "Result should be \"abcda\" < \"Abcda\""); 1122 doAssert(key2.compareTo(key3) == Collator::EQUAL, 1123 "Result should be \"abcda\" == \"abcda\""); 1124 1125 // Clone the key2 sortkey for later. 1126 int32_t keylength = 0; 1127 const uint8_t *key2primary_alias = key2.getByteArray(keylength); 1128 LocalArray<uint8_t> key2primary(new uint8_t[keylength]); 1129 memcpy(key2primary.getAlias(), key2primary_alias, keylength); 1130 1131 col->getSortKey(test1, sortkey1, 64); 1132 col->getSortKey(test2, sortkey2, 64); 1133 col->getSortKey(test3, sortkey3, 64); 1134 1135 const uint8_t *tempkey = key1.getByteArray(keylength); 1136 doAssert(memcmp(tempkey, sortkey1, keylength) == 0, 1137 "Test1 string should have the same collation key and sort key"); 1138 tempkey = key2.getByteArray(keylength); 1139 doAssert(memcmp(tempkey, sortkey2, keylength) == 0, 1140 "Test2 string should have the same collation key and sort key"); 1141 tempkey = key3.getByteArray(keylength); 1142 doAssert(memcmp(tempkey, sortkey3, keylength) == 0, 1143 "Test3 string should have the same collation key and sort key"); 1144 1145 col->getSortKey(test1, 5, sortkey1, 64); 1146 col->getSortKey(test2, 5, sortkey2, 64); 1147 col->getSortKey(test3, 5, sortkey3, 64); 1148 1149 tempkey = key1.getByteArray(keylength); 1150 doAssert(memcmp(tempkey, sortkey1, keylength) == 0, 1151 "Test1 string should have the same collation key and sort key"); 1152 tempkey = key2.getByteArray(keylength); 1153 doAssert(memcmp(tempkey, sortkey2, keylength) == 0, 1154 "Test2 string should have the same collation key and sort key"); 1155 tempkey = key3.getByteArray(keylength); 1156 doAssert(memcmp(tempkey, sortkey3, keylength) == 0, 1157 "Test3 string should have the same collation key and sort key"); 1158 1159 UnicodeString strtest1(test1); 1160 col->getSortKey(strtest1, sortkey1, 64); 1161 UnicodeString strtest2(test2); 1162 col->getSortKey(strtest2, sortkey2, 64); 1163 UnicodeString strtest3(test3); 1164 col->getSortKey(strtest3, sortkey3, 64); 1165 1166 tempkey = key1.getByteArray(keylength); 1167 doAssert(memcmp(tempkey, sortkey1, keylength) == 0, 1168 "Test1 string should have the same collation key and sort key"); 1169 tempkey = key2.getByteArray(keylength); 1170 doAssert(memcmp(tempkey, sortkey2, keylength) == 0, 1171 "Test2 string should have the same collation key and sort key"); 1172 tempkey = key3.getByteArray(keylength); 1173 doAssert(memcmp(tempkey, sortkey3, keylength) == 0, 1174 "Test3 string should have the same collation key and sort key"); 1175 1176 logln("Use secondary comparision level testing ...\n"); 1177 col->setStrength(Collator::SECONDARY); 1178 1179 col->getCollationKey(test1, u_strlen(test1), key1, status); 1180 col->getCollationKey(test2, u_strlen(test2), key2, status); 1181 col->getCollationKey(test3, u_strlen(test3), key3, status); 1182 1183 doAssert(key1.compareTo(key2) == Collator::EQUAL, 1184 "Result should be \"Abcda\" == \"abcda\""); 1185 doAssert(key2.compareTo(key3) == Collator::EQUAL, 1186 "Result should be \"abcda\" == \"abcda\""); 1187 1188 tempkey = key2.getByteArray(keylength); 1189 doAssert(memcmp(tempkey, key2primary.getAlias(), keylength - 1) == 0, 1190 "Binary format for 'abcda' sortkey different for secondary strength!"); 1191 1192 col->getSortKey(test1, sortkey1, 64); 1193 col->getSortKey(test2, sortkey2, 64); 1194 col->getSortKey(test3, sortkey3, 64); 1195 1196 tempkey = key1.getByteArray(keylength); 1197 doAssert(memcmp(tempkey, sortkey1, keylength) == 0, 1198 "Test1 string should have the same collation key and sort key"); 1199 tempkey = key2.getByteArray(keylength); 1200 doAssert(memcmp(tempkey, sortkey2, keylength) == 0, 1201 "Test2 string should have the same collation key and sort key"); 1202 tempkey = key3.getByteArray(keylength); 1203 doAssert(memcmp(tempkey, sortkey3, keylength) == 0, 1204 "Test3 string should have the same collation key and sort key"); 1205 1206 col->getSortKey(test1, 5, sortkey1, 64); 1207 col->getSortKey(test2, 5, sortkey2, 64); 1208 col->getSortKey(test3, 5, sortkey3, 64); 1209 1210 tempkey = key1.getByteArray(keylength); 1211 doAssert(memcmp(tempkey, sortkey1, keylength) == 0, 1212 "Test1 string should have the same collation key and sort key"); 1213 tempkey = key2.getByteArray(keylength); 1214 doAssert(memcmp(tempkey, sortkey2, keylength) == 0, 1215 "Test2 string should have the same collation key and sort key"); 1216 tempkey = key3.getByteArray(keylength); 1217 doAssert(memcmp(tempkey, sortkey3, keylength) == 0, 1218 "Test3 string should have the same collation key and sort key"); 1219 1220 col->getSortKey(strtest1, sortkey1, 64); 1221 col->getSortKey(strtest2, sortkey2, 64); 1222 col->getSortKey(strtest3, sortkey3, 64); 1223 1224 tempkey = key1.getByteArray(keylength); 1225 doAssert(memcmp(tempkey, sortkey1, keylength) == 0, 1226 "Test1 string should have the same collation key and sort key"); 1227 tempkey = key2.getByteArray(keylength); 1228 doAssert(memcmp(tempkey, sortkey2, keylength) == 0, 1229 "Test2 string should have the same collation key and sort key"); 1230 tempkey = key3.getByteArray(keylength); 1231 doAssert(memcmp(tempkey, sortkey3, keylength) == 0, 1232 "Test3 string should have the same collation key and sort key"); 1233 1234 logln("testing sortkey ends..."); 1235 delete col; 1236 } 1237 1238 void CollationAPITest::TestSortKeyOverflow() { 1239 IcuTestErrorCode errorCode(*this, "TestSortKeyOverflow()"); 1240 LocalPointer<Collator> col(Collator::createInstance(Locale::getEnglish(), errorCode)); 1241 if (errorCode.errDataIfFailureAndReset("Collator::createInstance(English) failed")) { 1242 return; 1243 } 1244 col->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode); 1245 UChar i_and_phi[] = { 0x438, 0x3c6 }; // Cyrillic small i & Greek small phi. 1246 // The sort key should be 6 bytes: 1247 // 2 bytes for the Cyrillic i, 1 byte for the primary-compression terminator, 1248 // 2 bytes for the Greek phi, and 1 byte for the NUL terminator. 1249 uint8_t sortKey[12]; 1250 int32_t length = col->getSortKey(i_and_phi, 2, sortKey, UPRV_LENGTHOF(sortKey)); 1251 uint8_t sortKey2[12]; 1252 for (int32_t capacity = 0; capacity < length; ++capacity) { 1253 uprv_memset(sortKey2, 2, UPRV_LENGTHOF(sortKey2)); 1254 int32_t length2 = col->getSortKey(i_and_phi, 2, sortKey2, capacity); 1255 if (length2 != length || 0 != uprv_memcmp(sortKey, sortKey2, capacity)) { 1256 errln("getSortKey(i_and_phi, capacity=%d) failed to write proper prefix", capacity); 1257 } else if (sortKey2[capacity] != 2 || sortKey2[capacity + 1] != 2) { 1258 errln("getSortKey(i_and_phi, capacity=%d) wrote beyond capacity", capacity); 1259 } 1260 } 1261 1262 // Now try to break getCollationKey(). 1263 // Internally, it always starts with a large stack buffer. 1264 // Since we cannot control the initial capacity, we throw an increasing number 1265 // of characters at it, with the problematic part at the end. 1266 const int32_t longCapacity = 2000; 1267 // Each 'a' in the prefix should result in one primary sort key byte. 1268 // For i_and_phi we expect 6 bytes, then the NUL terminator. 1269 const int32_t maxPrefixLength = longCapacity - 6 - 1; 1270 LocalArray<uint8_t> longSortKey(new uint8_t[longCapacity]); 1271 UnicodeString s(FALSE, i_and_phi, 2); 1272 for (int32_t prefixLength = 0; prefixLength < maxPrefixLength; ++prefixLength) { 1273 length = col->getSortKey(s, longSortKey.getAlias(), longCapacity); 1274 CollationKey collKey; 1275 col->getCollationKey(s, collKey, errorCode); 1276 int32_t collKeyLength; 1277 const uint8_t *collSortKey = collKey.getByteArray(collKeyLength); 1278 if (collKeyLength != length || 0 != uprv_memcmp(longSortKey.getAlias(), collSortKey, length)) { 1279 errln("getCollationKey(prefix[%d]+i_and_phi) failed to write proper sort key", prefixLength); 1280 } 1281 1282 // Insert an 'a' to match ++prefixLength. 1283 s.insert(prefixLength, (UChar)0x61); 1284 } 1285 } 1286 1287 void CollationAPITest::TestMaxExpansion() 1288 { 1289 UErrorCode status = U_ZERO_ERROR; 1290 UChar ch = 0; 1291 UChar32 unassigned = 0xEFFFD; 1292 uint32_t sorder = 0; 1293 uint32_t temporder = 0; 1294 1295 UnicodeString rule("&a < ab < c/aba < d < z < ch"); 1296 RuleBasedCollator coll(rule, status); 1297 if(U_FAILURE(status)) { 1298 errcheckln(status, "Collator creation failed with error %s", u_errorName(status)); 1299 return; 1300 } 1301 UnicodeString str(ch); 1302 CollationElementIterator *iter = 1303 coll.createCollationElementIterator(str); 1304 1305 while (ch < 0xFFFF && U_SUCCESS(status)) { 1306 int count = 1; 1307 uint32_t order; 1308 int32_t size = 0; 1309 1310 ch ++; 1311 1312 str.setCharAt(0, ch); 1313 iter->setText(str, status); 1314 order = iter->previous(status); 1315 1316 /* thai management */ 1317 if (order == 0) 1318 order = iter->previous(status); 1319 1320 while (U_SUCCESS(status) && iter->previous(status) != CollationElementIterator::NULLORDER) { 1321 count ++; 1322 } 1323 1324 size = coll.getMaxExpansion(order); 1325 if (U_FAILURE(status) || size < count) { 1326 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d", 1327 ch, size, count); 1328 } 1329 } 1330 1331 /* testing for exact max expansion */ 1332 int32_t size; 1333 ch = 0; 1334 while (ch < 0x61) { 1335 uint32_t order; 1336 str.setCharAt(0, ch); 1337 iter->setText(str, status); 1338 order = iter->previous(status); 1339 size = coll.getMaxExpansion(order); 1340 if (U_FAILURE(status) || size != 1) { 1341 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d", 1342 ch, size, 1); 1343 } 1344 ch ++; 1345 } 1346 1347 ch = 0x63; 1348 str.setTo(ch); 1349 iter->setText(str, status); 1350 temporder = iter->previous(status); 1351 size = coll.getMaxExpansion(temporder); 1352 if (U_FAILURE(status) || size != 3) { 1353 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d", 1354 ch, temporder, size, 3); 1355 } 1356 1357 ch = 0x64; 1358 str.setTo(ch); 1359 iter->setText(str, status); 1360 temporder = iter->previous(status); 1361 size = coll.getMaxExpansion(temporder); 1362 if (U_FAILURE(status) || size != 1) { 1363 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d", 1364 ch, temporder, size, 1); 1365 } 1366 1367 str.setTo(unassigned); 1368 iter->setText(str, status); 1369 sorder = iter->previous(status); 1370 size = coll.getMaxExpansion(sorder); 1371 if (U_FAILURE(status) || size != 2) { 1372 errln("Failure at supplementary codepoints, maximum expansion count %d < %d", 1373 size, 2); 1374 } 1375 1376 /* testing jamo */ 1377 ch = 0x1165; 1378 str.setTo(ch); 1379 iter->setText(str, status); 1380 temporder = iter->previous(status); 1381 size = coll.getMaxExpansion(temporder); 1382 if (U_FAILURE(status) || size > 3) { 1383 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d", 1384 ch, size, 3); 1385 } 1386 1387 delete iter; 1388 1389 /* testing special jamo &a<\u1160 */ 1390 rule = CharsToUnicodeString("\\u0026\\u0071\\u003c\\u1165\\u002f\\u0071\\u0071\\u0071\\u0071"); 1391 1392 RuleBasedCollator jamocoll(rule, status); 1393 iter = jamocoll.createCollationElementIterator(str); 1394 temporder = iter->previous(status); 1395 size = iter->getMaxExpansion(temporder); 1396 if (U_FAILURE(status) || size != 6) { 1397 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d", 1398 ch, size, 5); 1399 } 1400 1401 delete iter; 1402 } 1403 1404 void CollationAPITest::TestDisplayName() 1405 { 1406 UErrorCode error = U_ZERO_ERROR; 1407 Collator *coll = Collator::createInstance("en_US", error); 1408 if (U_FAILURE(error)) { 1409 errcheckln(error, "Failure creating english collator - %s", u_errorName(error)); 1410 return; 1411 } 1412 UnicodeString name; 1413 UnicodeString result; 1414 coll->getDisplayName(Locale::getCanadaFrench(), result); 1415 Locale::getCanadaFrench().getDisplayName(name); 1416 if (result.compare(name)) { 1417 errln("Failure getting the correct name for locale en_US"); 1418 } 1419 1420 coll->getDisplayName(Locale::getSimplifiedChinese(), result); 1421 Locale::getSimplifiedChinese().getDisplayName(name); 1422 if (result.compare(name)) { 1423 errln("Failure getting the correct name for locale zh_SG"); 1424 } 1425 delete coll; 1426 } 1427 1428 void CollationAPITest::TestAttribute() 1429 { 1430 UErrorCode error = U_ZERO_ERROR; 1431 Collator *coll = Collator::createInstance(error); 1432 1433 if (U_FAILURE(error)) { 1434 errcheckln(error, "Creation of default collator failed - %s", u_errorName(error)); 1435 return; 1436 } 1437 1438 coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_OFF, error); 1439 if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_OFF || 1440 U_FAILURE(error)) { 1441 errln("Setting and retrieving of the french collation failed"); 1442 } 1443 1444 coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_ON, error); 1445 if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_ON || 1446 U_FAILURE(error)) { 1447 errln("Setting and retrieving of the french collation failed"); 1448 } 1449 1450 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, error); 1451 if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_SHIFTED || 1452 U_FAILURE(error)) { 1453 errln("Setting and retrieving of the alternate handling failed"); 1454 } 1455 1456 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, error); 1457 if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_NON_IGNORABLE || 1458 U_FAILURE(error)) { 1459 errln("Setting and retrieving of the alternate handling failed"); 1460 } 1461 1462 coll->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, error); 1463 if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_LOWER_FIRST || 1464 U_FAILURE(error)) { 1465 errln("Setting and retrieving of the case first attribute failed"); 1466 } 1467 1468 coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, error); 1469 if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_UPPER_FIRST || 1470 U_FAILURE(error)) { 1471 errln("Setting and retrieving of the case first attribute failed"); 1472 } 1473 1474 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, error); 1475 if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_ON || 1476 U_FAILURE(error)) { 1477 errln("Setting and retrieving of the case level attribute failed"); 1478 } 1479 1480 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, error); 1481 if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_OFF || 1482 U_FAILURE(error)) { 1483 errln("Setting and retrieving of the case level attribute failed"); 1484 } 1485 1486 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, error); 1487 if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_ON || 1488 U_FAILURE(error)) { 1489 errln("Setting and retrieving of the normalization on/off attribute failed"); 1490 } 1491 1492 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, error); 1493 if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_OFF || 1494 U_FAILURE(error)) { 1495 errln("Setting and retrieving of the normalization on/off attribute failed"); 1496 } 1497 1498 coll->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, error); 1499 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_PRIMARY || 1500 U_FAILURE(error)) { 1501 errln("Setting and retrieving of the collation strength failed"); 1502 } 1503 1504 coll->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, error); 1505 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_SECONDARY || 1506 U_FAILURE(error)) { 1507 errln("Setting and retrieving of the collation strength failed"); 1508 } 1509 1510 coll->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, error); 1511 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_TERTIARY || 1512 U_FAILURE(error)) { 1513 errln("Setting and retrieving of the collation strength failed"); 1514 } 1515 1516 coll->setAttribute(UCOL_STRENGTH, UCOL_QUATERNARY, error); 1517 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_QUATERNARY || 1518 U_FAILURE(error)) { 1519 errln("Setting and retrieving of the collation strength failed"); 1520 } 1521 1522 coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, error); 1523 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_IDENTICAL || 1524 U_FAILURE(error)) { 1525 errln("Setting and retrieving of the collation strength failed"); 1526 } 1527 1528 delete coll; 1529 } 1530 1531 void CollationAPITest::TestVariableTopSetting() { 1532 UErrorCode status = U_ZERO_ERROR; 1533 1534 UChar vt[256] = { 0 }; 1535 1536 // Use the root collator, not the default collator. 1537 // This test fails with en_US_POSIX which tailors the dollar sign after 'A'. 1538 Collator *coll = Collator::createInstance(Locale::getRoot(), status); 1539 if(U_FAILURE(status)) { 1540 delete coll; 1541 errcheckln(status, "Collator creation failed with error %s", u_errorName(status)); 1542 return; 1543 } 1544 1545 uint32_t oldVarTop = coll->getVariableTop(status); 1546 1547 // ICU 53+: The character must be in a supported reordering group, 1548 // and the variable top is pinned to the end of that group. 1549 vt[0] = 0x0041; 1550 1551 (void)coll->setVariableTop(vt, 1, status); 1552 if(status != U_ILLEGAL_ARGUMENT_ERROR) { 1553 errln("setVariableTop(letter) did not detect illegal argument - %s", u_errorName(status)); 1554 } 1555 1556 status = U_ZERO_ERROR; 1557 vt[0] = 0x24; // dollar sign (currency symbol) 1558 uint32_t newVarTop = coll->setVariableTop(vt, 1, status); 1559 if(U_FAILURE(status)) { 1560 errln("setVariableTop(dollar sign) failed: %s", u_errorName(status)); 1561 return; 1562 } 1563 if(newVarTop != coll->getVariableTop(status)) { 1564 errln("setVariableTop(dollar sign) != following getVariableTop()"); 1565 } 1566 1567 UnicodeString dollar((UChar)0x24); 1568 UnicodeString euro((UChar)0x20AC); 1569 uint32_t newVarTop2 = coll->setVariableTop(euro, status); 1570 assertEquals("setVariableTop(Euro sign) == following getVariableTop()", 1571 (int64_t)newVarTop2, (int64_t)coll->getVariableTop(status)); 1572 assertEquals("setVariableTop(Euro sign) == setVariableTop(dollar sign) (should pin to top of currency group)", 1573 (int64_t)newVarTop2, (int64_t)newVarTop); 1574 1575 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); 1576 assertEquals("empty==dollar", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), dollar)); 1577 assertEquals("empty==euro", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), euro)); 1578 assertEquals("dollar<zero", (int32_t)UCOL_LESS, (int32_t)coll->compare(dollar, UnicodeString((UChar)0x30))); 1579 1580 coll->setVariableTop(oldVarTop, status); 1581 1582 uint32_t newerVarTop = coll->setVariableTop(UnicodeString(vt, 1), status); 1583 1584 if(newVarTop != newerVarTop) { 1585 errln("Didn't set vartop properly from UnicodeString!\n"); 1586 } 1587 1588 delete coll; 1589 1590 } 1591 1592 void CollationAPITest::TestMaxVariable() { 1593 UErrorCode errorCode = U_ZERO_ERROR; 1594 LocalPointer<Collator> coll(Collator::createInstance(Locale::getRoot(), errorCode)); 1595 if(U_FAILURE(errorCode)) { 1596 errcheckln(errorCode, "Collator creation failed with error %s", u_errorName(errorCode)); 1597 return; 1598 } 1599 1600 (void)coll->setMaxVariable(UCOL_REORDER_CODE_OTHERS, errorCode); 1601 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 1602 errln("setMaxVariable(others) did not detect illegal argument - %s", u_errorName(errorCode)); 1603 } 1604 1605 errorCode = U_ZERO_ERROR; 1606 (void)coll->setMaxVariable(UCOL_REORDER_CODE_CURRENCY, errorCode); 1607 1608 if(UCOL_REORDER_CODE_CURRENCY != coll->getMaxVariable()) { 1609 errln("setMaxVariable(currency) != following getMaxVariable()"); 1610 } 1611 1612 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, errorCode); 1613 assertEquals("empty==dollar", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), UnicodeString((UChar)0x24))); 1614 assertEquals("empty==euro", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), UnicodeString((UChar)0x20AC))); 1615 assertEquals("dollar<zero", (int32_t)UCOL_LESS, (int32_t)coll->compare(UnicodeString((UChar)0x24), UnicodeString((UChar)0x30))); 1616 } 1617 1618 void CollationAPITest::TestGetLocale() { 1619 UErrorCode status = U_ZERO_ERROR; 1620 const char *rules = "&a<x<y<z"; 1621 UChar rlz[256] = {0}; 1622 1623 Collator *coll = Collator::createInstance("root", status); 1624 if(U_FAILURE(status)) { 1625 dataerrln("Failed to open collator for \"root\" with %s", u_errorName(status)); 1626 return; 1627 } 1628 Locale locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status); 1629 if(locale != Locale::getRoot()) { 1630 errln("Collator::createInstance(\"root\").getLocale(actual) != Locale::getRoot(); " 1631 "getLocale().getName() = \"%s\"", 1632 locale.getName()); 1633 } 1634 delete coll; 1635 1636 coll = Collator::createInstance("", status); 1637 if(U_FAILURE(status)) { 1638 dataerrln("Failed to open collator for \"\" with %s", u_errorName(status)); 1639 return; 1640 } 1641 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status); 1642 if(locale != Locale::getRoot()) { 1643 errln("Collator::createInstance(\"\").getLocale(actual) != Locale::getRoot(); " 1644 "getLocale().getName() = \"%s\"", 1645 locale.getName()); 1646 } 1647 delete coll; 1648 1649 int32_t i = 0; 1650 1651 static const struct { 1652 const char* requestedLocale; 1653 const char* validLocale; 1654 const char* actualLocale; 1655 } testStruct[] = { 1656 // Note: Locale::getRoot().getName() == "" not "root". 1657 { "de_DE", "de", "" }, 1658 { "sr_RS", "sr_Cyrl_RS", "sr" }, 1659 { "en_US_CALIFORNIA", "en_US", "" }, 1660 { "fr_FR_NONEXISTANT", "fr", "" }, 1661 // pinyin is the default, therefore suppressed. 1662 { "zh_CN", "zh_Hans_CN", "zh" }, 1663 // zh_Hant has default=stroke but the data is in zh. 1664 { "zh_TW", "zh_Hant_TW", "zh@collation=stroke" }, 1665 { "zh_TW@collation=pinyin", "zh_Hant_TW@collation=pinyin", "zh" }, 1666 { "zh_CN@collation=stroke", "zh_Hans_CN@collation=stroke", "zh@collation=stroke" }, 1667 // yue/yue_Hant aliased to zh_Hant, yue_Hans aliased to zh_Hans. 1668 { "yue", "zh_Hant", "zh@collation=stroke" }, 1669 { "yue_HK", "zh_Hant", "zh@collation=stroke" }, 1670 { "yue_Hant", "zh_Hant", "zh@collation=stroke" }, 1671 { "yue_Hant_HK", "zh_Hant", "zh@collation=stroke" }, 1672 { "yue@collation=pinyin", "zh_Hant@collation=pinyin", "zh" }, 1673 { "yue_HK@collation=pinyin", "zh_Hant@collation=pinyin", "zh" }, 1674 { "yue_CN", "zh_Hans", "zh" }, 1675 { "yue_Hans", "zh_Hans", "zh" }, 1676 { "yue_Hans_CN", "zh_Hans", "zh" }, 1677 { "yue_Hans@collation=stroke", "zh_Hans@collation=stroke", "zh@collation=stroke" }, 1678 { "yue_CN@collation=stroke", "zh_Hans@collation=stroke", "zh@collation=stroke" } 1679 }; 1680 1681 u_unescape(rules, rlz, 256); 1682 1683 /* test opening collators for different locales */ 1684 for(i = 0; i<UPRV_LENGTHOF(testStruct); i++) { 1685 status = U_ZERO_ERROR; 1686 coll = Collator::createInstance(testStruct[i].requestedLocale, status); 1687 if(U_FAILURE(status)) { 1688 errln("Failed to open collator for %s with %s", testStruct[i].requestedLocale, u_errorName(status)); 1689 delete coll; 1690 continue; 1691 } 1692 // The requested locale may be the same as the valid locale, 1693 // or may not be supported at all. See ticket #10477. 1694 locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status); 1695 if(U_SUCCESS(status) && 1696 locale != testStruct[i].requestedLocale && locale != testStruct[i].validLocale) { 1697 errln("[Coll %s]: Error in requested locale, expected %s or %s, got %s", 1698 testStruct[i].requestedLocale, 1699 testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName()); 1700 } 1701 status = U_ZERO_ERROR; 1702 locale = coll->getLocale(ULOC_VALID_LOCALE, status); 1703 if(locale != testStruct[i].validLocale) { 1704 errln("[Coll %s]: Error in valid locale, expected %s, got %s", 1705 testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName()); 1706 } 1707 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status); 1708 if(locale != testStruct[i].actualLocale) { 1709 errln("[Coll %s]: Error in actual locale, expected %s, got %s", 1710 testStruct[i].requestedLocale, testStruct[i].actualLocale, locale.getName()); 1711 } 1712 // If we open a collator for the actual locale, we should get an equivalent one again. 1713 LocalPointer<Collator> coll2(Collator::createInstance(locale, status)); 1714 if(U_FAILURE(status)) { 1715 errln("Failed to open collator for actual locale \"%s\" with %s", 1716 locale.getName(), u_errorName(status)); 1717 } else { 1718 Locale actual2 = coll2->getLocale(ULOC_ACTUAL_LOCALE, status); 1719 if(actual2 != locale) { 1720 errln("[Coll actual \"%s\"]: Error in actual locale, got different one: \"%s\"", 1721 locale.getName(), actual2.getName()); 1722 } 1723 if(*coll2 != *coll) { 1724 errln("[Coll actual \"%s\"]: Got different collator than before", locale.getName()); 1725 } 1726 } 1727 delete coll; 1728 } 1729 1730 /* completely non-existent locale for collator should get a root collator */ 1731 { 1732 LocalPointer<Collator> coll(Collator::createInstance("blahaha", status)); 1733 if(U_FAILURE(status)) { 1734 errln("Failed to open collator with %s", u_errorName(status)); 1735 return; 1736 } 1737 Locale valid = coll->getLocale(ULOC_VALID_LOCALE, status); 1738 const char *name = valid.getName(); 1739 if(*name != 0 && strcmp(name, "root") != 0) { 1740 errln("Valid locale for nonexisting-locale collator is \"%s\" not root", name); 1741 } 1742 Locale actual = coll->getLocale(ULOC_ACTUAL_LOCALE, status); 1743 name = actual.getName(); 1744 if(*name != 0 && strcmp(name, "root") != 0) { 1745 errln("Actual locale for nonexisting-locale collator is \"%s\" not root", name); 1746 } 1747 } 1748 1749 1750 1751 /* collator instantiated from rules should have all three locales NULL */ 1752 coll = new RuleBasedCollator(rlz, status); 1753 locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status); 1754 if(U_SUCCESS(status) && !locale.isBogus()) { 1755 errln("For collator instantiated from rules, requested locale %s is not bogus", locale.getName()); 1756 } 1757 status = U_ZERO_ERROR; 1758 locale = coll->getLocale(ULOC_VALID_LOCALE, status); 1759 if(!locale.isBogus()) { 1760 errln("For collator instantiated from rules, valid locale %s is not bogus", locale.getName()); 1761 } 1762 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status); 1763 if(!locale.isBogus()) { 1764 errln("For collator instantiated from rules, actual locale %s is not bogus", locale.getName()); 1765 } 1766 delete coll; 1767 } 1768 1769 struct teststruct { 1770 const char *original; 1771 uint8_t key[256]; 1772 }; 1773 1774 1775 1776 U_CDECL_BEGIN 1777 static int U_CALLCONV 1778 compare_teststruct(const void *string1, const void *string2) { 1779 return(strcmp((const char *)((struct teststruct *)string1)->key, (const char *)((struct teststruct *)string2)->key)); 1780 } 1781 U_CDECL_END 1782 1783 1784 void CollationAPITest::TestBounds(void) { 1785 UErrorCode status = U_ZERO_ERROR; 1786 1787 Collator *coll = Collator::createInstance(Locale("sh"), status); 1788 if(U_FAILURE(status)) { 1789 delete coll; 1790 errcheckln(status, "Collator creation failed with %s", u_errorName(status)); 1791 return; 1792 } 1793 1794 uint8_t sortkey[512], lower[512], upper[512]; 1795 UChar buffer[512]; 1796 1797 static const char * const test[] = { 1798 "John Smith", 1799 "JOHN SMITH", 1800 "john SMITH", 1801 "j\\u00F6hn sm\\u00EFth", 1802 "J\\u00F6hn Sm\\u00EFth", 1803 "J\\u00D6HN SM\\u00CFTH", 1804 "john smithsonian", 1805 "John Smithsonian" 1806 }; 1807 1808 struct teststruct tests[] = { 1809 {"\\u010CAKI MIHALJ", {0}}, 1810 {"\\u010CAKI MIHALJ", {0}}, 1811 {"\\u010CAKI PIRO\\u0160KA", {0}}, 1812 {"\\u010CABAI ANDRIJA", {0}}, 1813 {"\\u010CABAI LAJO\\u0160", {0}}, 1814 {"\\u010CABAI MARIJA", {0}}, 1815 {"\\u010CABAI STEVAN", {0}}, 1816 {"\\u010CABAI STEVAN", {0}}, 1817 {"\\u010CABARKAPA BRANKO", {0}}, 1818 {"\\u010CABARKAPA MILENKO", {0}}, 1819 {"\\u010CABARKAPA MIROSLAV", {0}}, 1820 {"\\u010CABARKAPA SIMO", {0}}, 1821 {"\\u010CABARKAPA STANKO", {0}}, 1822 {"\\u010CABARKAPA TAMARA", {0}}, 1823 {"\\u010CABARKAPA TOMA\\u0160", {0}}, 1824 {"\\u010CABDARI\\u0106 NIKOLA", {0}}, 1825 {"\\u010CABDARI\\u0106 ZORICA", {0}}, 1826 {"\\u010CABI NANDOR", {0}}, 1827 {"\\u010CABOVI\\u0106 MILAN", {0}}, 1828 {"\\u010CABRADI AGNEZIJA", {0}}, 1829 {"\\u010CABRADI IVAN", {0}}, 1830 {"\\u010CABRADI JELENA", {0}}, 1831 {"\\u010CABRADI LJUBICA", {0}}, 1832 {"\\u010CABRADI STEVAN", {0}}, 1833 {"\\u010CABRDA MARTIN", {0}}, 1834 {"\\u010CABRILO BOGDAN", {0}}, 1835 {"\\u010CABRILO BRANISLAV", {0}}, 1836 {"\\u010CABRILO LAZAR", {0}}, 1837 {"\\u010CABRILO LJUBICA", {0}}, 1838 {"\\u010CABRILO SPASOJA", {0}}, 1839 {"\\u010CADE\\u0160 ZDENKA", {0}}, 1840 {"\\u010CADESKI BLAGOJE", {0}}, 1841 {"\\u010CADOVSKI VLADIMIR", {0}}, 1842 {"\\u010CAGLJEVI\\u0106 TOMA", {0}}, 1843 {"\\u010CAGOROVI\\u0106 VLADIMIR", {0}}, 1844 {"\\u010CAJA VANKA", {0}}, 1845 {"\\u010CAJI\\u0106 BOGOLJUB", {0}}, 1846 {"\\u010CAJI\\u0106 BORISLAV", {0}}, 1847 {"\\u010CAJI\\u0106 RADOSLAV", {0}}, 1848 {"\\u010CAK\\u0160IRAN MILADIN", {0}}, 1849 {"\\u010CAKAN EUGEN", {0}}, 1850 {"\\u010CAKAN EVGENIJE", {0}}, 1851 {"\\u010CAKAN IVAN", {0}}, 1852 {"\\u010CAKAN JULIJAN", {0}}, 1853 {"\\u010CAKAN MIHAJLO", {0}}, 1854 {"\\u010CAKAN STEVAN", {0}}, 1855 {"\\u010CAKAN VLADIMIR", {0}}, 1856 {"\\u010CAKAN VLADIMIR", {0}}, 1857 {"\\u010CAKAN VLADIMIR", {0}}, 1858 {"\\u010CAKARA ANA", {0}}, 1859 {"\\u010CAKAREVI\\u0106 MOMIR", {0}}, 1860 {"\\u010CAKAREVI\\u0106 NEDELJKO", {0}}, 1861 {"\\u010CAKI \\u0160ANDOR", {0}}, 1862 {"\\u010CAKI AMALIJA", {0}}, 1863 {"\\u010CAKI ANDRA\\u0160", {0}}, 1864 {"\\u010CAKI LADISLAV", {0}}, 1865 {"\\u010CAKI LAJO\\u0160", {0}}, 1866 {"\\u010CAKI LASLO", {0}} 1867 }; 1868 1869 1870 1871 int32_t i = 0, j = 0, k = 0, buffSize = 0, skSize = 0, lowerSize = 0, upperSize = 0; 1872 int32_t arraySize = UPRV_LENGTHOF(tests); 1873 1874 (void)lowerSize; // Suppress unused variable warnings. 1875 (void)upperSize; 1876 1877 for(i = 0; i<arraySize; i++) { 1878 buffSize = u_unescape(tests[i].original, buffer, 512); 1879 skSize = coll->getSortKey(buffer, buffSize, tests[i].key, 512); 1880 } 1881 1882 qsort(tests, arraySize, sizeof(struct teststruct), compare_teststruct); 1883 1884 for(i = 0; i < arraySize-1; i++) { 1885 for(j = i+1; j < arraySize; j++) { 1886 lowerSize = coll->getBound(tests[i].key, -1, UCOL_BOUND_LOWER, 1, lower, 512, status); 1887 upperSize = coll->getBound(tests[j].key, -1, UCOL_BOUND_UPPER, 1, upper, 512, status); 1888 for(k = i; k <= j; k++) { 1889 if(strcmp((const char *)lower, (const char *)tests[k].key) > 0) { 1890 errln("Problem with lower! j = %i (%s vs %s)", k, tests[k].original, tests[i].original); 1891 } 1892 if(strcmp((const char *)upper, (const char *)tests[k].key) <= 0) { 1893 errln("Problem with upper! j = %i (%s vs %s)", k, tests[k].original, tests[j].original); 1894 } 1895 } 1896 } 1897 } 1898 1899 1900 for(i = 0; i<UPRV_LENGTHOF(test); i++) { 1901 buffSize = u_unescape(test[i], buffer, 512); 1902 skSize = coll->getSortKey(buffer, buffSize, sortkey, 512); 1903 lowerSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_LOWER, 1, lower, 512, &status); 1904 upperSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_UPPER_LONG, 1, upper, 512, &status); 1905 for(j = i+1; j<UPRV_LENGTHOF(test); j++) { 1906 buffSize = u_unescape(test[j], buffer, 512); 1907 skSize = coll->getSortKey(buffer, buffSize, sortkey, 512); 1908 if(strcmp((const char *)lower, (const char *)sortkey) > 0) { 1909 errln("Problem with lower! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]); 1910 } 1911 if(strcmp((const char *)upper, (const char *)sortkey) <= 0) { 1912 errln("Problem with upper! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]); 1913 } 1914 } 1915 } 1916 delete coll; 1917 } 1918 1919 1920 void CollationAPITest::TestGetTailoredSet() 1921 { 1922 struct { 1923 const char *rules; 1924 const char *tests[20]; 1925 int32_t testsize; 1926 } setTest[] = { 1927 { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3}, 1928 { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4} 1929 }; 1930 1931 int32_t i = 0, j = 0; 1932 UErrorCode status = U_ZERO_ERROR; 1933 1934 UnicodeString buff; 1935 UnicodeSet *set = NULL; 1936 1937 for(i = 0; i < UPRV_LENGTHOF(setTest); i++) { 1938 buff = UnicodeString(setTest[i].rules, -1, US_INV).unescape(); 1939 RuleBasedCollator coll(buff, status); 1940 if(U_SUCCESS(status)) { 1941 set = coll.getTailoredSet(status); 1942 if(set->size() < setTest[i].testsize) { 1943 errln("Tailored set size smaller (%d) than expected (%d)", set->size(), setTest[i].testsize); 1944 } 1945 for(j = 0; j < setTest[i].testsize; j++) { 1946 buff = UnicodeString(setTest[i].tests[j], -1, US_INV).unescape(); 1947 if(!set->contains(buff)) { 1948 errln("Tailored set doesn't contain %s... It should", setTest[i].tests[j]); 1949 } 1950 } 1951 delete set; 1952 } else { 1953 errcheckln(status, "Couldn't open collator with rules %s - %s", setTest[i].rules, u_errorName(status)); 1954 } 1955 } 1956 } 1957 1958 void CollationAPITest::TestUClassID() 1959 { 1960 char id = *((char *)RuleBasedCollator::getStaticClassID()); 1961 if (id != 0) { 1962 errln("Static class id for RuleBasedCollator should be 0"); 1963 } 1964 UErrorCode status = U_ZERO_ERROR; 1965 RuleBasedCollator *coll 1966 = (RuleBasedCollator *)Collator::createInstance(status); 1967 if(U_FAILURE(status)) { 1968 delete coll; 1969 errcheckln(status, "Collator creation failed with %s", u_errorName(status)); 1970 return; 1971 } 1972 id = *((char *)coll->getDynamicClassID()); 1973 if (id != 0) { 1974 errln("Dynamic class id for RuleBasedCollator should be 0"); 1975 } 1976 id = *((char *)CollationKey::getStaticClassID()); 1977 if (id != 0) { 1978 errln("Static class id for CollationKey should be 0"); 1979 } 1980 CollationKey *key = new CollationKey(); 1981 id = *((char *)key->getDynamicClassID()); 1982 if (id != 0) { 1983 errln("Dynamic class id for CollationKey should be 0"); 1984 } 1985 id = *((char *)CollationElementIterator::getStaticClassID()); 1986 if (id != 0) { 1987 errln("Static class id for CollationElementIterator should be 0"); 1988 } 1989 UnicodeString str("testing"); 1990 CollationElementIterator *iter = coll->createCollationElementIterator(str); 1991 id = *((char *)iter->getDynamicClassID()); 1992 if (id != 0) { 1993 errln("Dynamic class id for CollationElementIterator should be 0"); 1994 } 1995 delete key; 1996 delete iter; 1997 delete coll; 1998 } 1999 2000 class TestCollator : public Collator 2001 { 2002 public: 2003 virtual Collator* clone(void) const; 2004 2005 using Collator::compare; 2006 2007 virtual UCollationResult compare(const UnicodeString& source, 2008 const UnicodeString& target, 2009 UErrorCode& status) const; 2010 virtual UCollationResult compare(const UnicodeString& source, 2011 const UnicodeString& target, 2012 int32_t length, 2013 UErrorCode& status) const; 2014 virtual UCollationResult compare(const UChar* source, 2015 int32_t sourceLength, 2016 const UChar* target, 2017 int32_t targetLength, 2018 UErrorCode& status) const; 2019 virtual CollationKey& getCollationKey(const UnicodeString& source, 2020 CollationKey& key, 2021 UErrorCode& status) const; 2022 virtual CollationKey& getCollationKey(const UChar*source, 2023 int32_t sourceLength, 2024 CollationKey& key, 2025 UErrorCode& status) const; 2026 virtual int32_t hashCode(void) const; 2027 virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; 2028 virtual ECollationStrength getStrength(void) const; 2029 virtual void setStrength(ECollationStrength newStrength); 2030 virtual UClassID getDynamicClassID(void) const; 2031 virtual void getVersion(UVersionInfo info) const; 2032 virtual void setAttribute(UColAttribute attr, UColAttributeValue value, 2033 UErrorCode &status); 2034 virtual UColAttributeValue getAttribute(UColAttribute attr, 2035 UErrorCode &status) const; 2036 virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, 2037 UErrorCode &status); 2038 virtual uint32_t setVariableTop(const UnicodeString &varTop, 2039 UErrorCode &status); 2040 virtual void setVariableTop(uint32_t varTop, UErrorCode &status); 2041 virtual uint32_t getVariableTop(UErrorCode &status) const; 2042 virtual int32_t getSortKey(const UnicodeString& source, 2043 uint8_t* result, 2044 int32_t resultLength) const; 2045 virtual int32_t getSortKey(const UChar*source, int32_t sourceLength, 2046 uint8_t*result, int32_t resultLength) const; 2047 virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; 2048 virtual UBool operator==(const Collator& other) const; 2049 // Collator::operator!= calls !Collator::operator== which works for all subclasses. 2050 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); 2051 TestCollator() : Collator() {}; 2052 TestCollator(UCollationStrength collationStrength, 2053 UNormalizationMode decompositionMode) : Collator(collationStrength, decompositionMode) {}; 2054 }; 2055 2056 inline UBool TestCollator::operator==(const Collator& other) const { 2057 // TestCollator has no fields, so we test for identity. 2058 return this == &other; 2059 2060 // Normally, subclasses should do something like the following: 2061 // if (this == &other) { return TRUE; } 2062 // if (!Collator::operator==(other)) { return FALSE; } // not the same class 2063 // 2064 // const TestCollator &o = (const TestCollator&)other; 2065 // (compare this vs. o's subclass fields) 2066 } 2067 2068 Collator* TestCollator::clone() const 2069 { 2070 return new TestCollator(); 2071 } 2072 2073 UCollationResult TestCollator::compare(const UnicodeString& source, 2074 const UnicodeString& target, 2075 UErrorCode& status) const 2076 { 2077 if(U_SUCCESS(status)) { 2078 return UCollationResult(source.compare(target)); 2079 } else { 2080 return UCOL_EQUAL; 2081 } 2082 } 2083 2084 UCollationResult TestCollator::compare(const UnicodeString& source, 2085 const UnicodeString& target, 2086 int32_t length, 2087 UErrorCode& status) const 2088 { 2089 if(U_SUCCESS(status)) { 2090 return UCollationResult(source.compare(0, length, target)); 2091 } else { 2092 return UCOL_EQUAL; 2093 } 2094 } 2095 2096 UCollationResult TestCollator::compare(const UChar* source, 2097 int32_t sourceLength, 2098 const UChar* target, 2099 int32_t targetLength, 2100 UErrorCode& status) const 2101 { 2102 UnicodeString s(source, sourceLength); 2103 UnicodeString t(target, targetLength); 2104 return compare(s, t, status); 2105 } 2106 2107 CollationKey& TestCollator::getCollationKey(const UnicodeString& source, 2108 CollationKey& key, 2109 UErrorCode& status) const 2110 { 2111 char temp[100]; 2112 int length = 100; 2113 length = source.extract(temp, length, NULL, status); 2114 temp[length] = 0; 2115 CollationKey tempkey((uint8_t*)temp, length); 2116 key = tempkey; 2117 return key; 2118 } 2119 2120 CollationKey& TestCollator::getCollationKey(const UChar*source, 2121 int32_t sourceLength, 2122 CollationKey& key, 2123 UErrorCode& status) const 2124 { 2125 //s tack allocation used since collationkey does not keep the unicodestring 2126 UnicodeString str(source, sourceLength); 2127 return getCollationKey(str, key, status); 2128 } 2129 2130 int32_t TestCollator::getSortKey(const UnicodeString& source, uint8_t* result, 2131 int32_t resultLength) const 2132 { 2133 UErrorCode status = U_ZERO_ERROR; 2134 int32_t length = source.extract((char *)result, resultLength, NULL, 2135 status); 2136 result[length] = 0; 2137 return length; 2138 } 2139 2140 int32_t TestCollator::getSortKey(const UChar*source, int32_t sourceLength, 2141 uint8_t*result, int32_t resultLength) const 2142 { 2143 UnicodeString str(source, sourceLength); 2144 return getSortKey(str, result, resultLength); 2145 } 2146 2147 int32_t TestCollator::hashCode() const 2148 { 2149 return 0; 2150 } 2151 2152 Locale TestCollator::getLocale(ULocDataLocaleType type, UErrorCode& status) const 2153 { 2154 // api not used, this is to make the compiler happy 2155 if (U_FAILURE(status)) { 2156 (void)type; 2157 } 2158 return NULL; 2159 } 2160 2161 Collator::ECollationStrength TestCollator::getStrength() const 2162 { 2163 return TERTIARY; 2164 } 2165 2166 void TestCollator::setStrength(Collator::ECollationStrength newStrength) 2167 { 2168 // api not used, this is to make the compiler happy 2169 (void)newStrength; 2170 } 2171 2172 UClassID TestCollator::getDynamicClassID(void) const 2173 { 2174 return 0; 2175 } 2176 2177 void TestCollator::getVersion(UVersionInfo info) const 2178 { 2179 // api not used, this is to make the compiler happy 2180 memset(info, 0, U_MAX_VERSION_LENGTH); 2181 } 2182 2183 void TestCollator::setAttribute(UColAttribute /*attr*/, UColAttributeValue /*value*/, 2184 UErrorCode & /*status*/) 2185 { 2186 } 2187 2188 UColAttributeValue TestCollator::getAttribute(UColAttribute attr, 2189 UErrorCode &status) const 2190 { 2191 // api not used, this is to make the compiler happy 2192 if (U_FAILURE(status) || attr == UCOL_ATTRIBUTE_COUNT) { 2193 return UCOL_OFF; 2194 } 2195 return UCOL_DEFAULT; 2196 } 2197 2198 uint32_t TestCollator::setVariableTop(const UChar *varTop, int32_t len, 2199 UErrorCode &status) 2200 { 2201 // api not used, this is to make the compiler happy 2202 if (U_SUCCESS(status) && (varTop == 0 || len < -1)) { 2203 status = U_ILLEGAL_ARGUMENT_ERROR; 2204 } 2205 return 0; 2206 } 2207 2208 uint32_t TestCollator::setVariableTop(const UnicodeString &varTop, 2209 UErrorCode &status) 2210 { 2211 // api not used, this is to make the compiler happy 2212 if (U_SUCCESS(status) && varTop.length() == 0) { 2213 status = U_ILLEGAL_ARGUMENT_ERROR; 2214 } 2215 return 0; 2216 } 2217 2218 void TestCollator::setVariableTop(uint32_t varTop, UErrorCode &status) 2219 { 2220 // api not used, this is to make the compiler happy 2221 if (U_SUCCESS(status) && varTop == 0) { 2222 status = U_ILLEGAL_ARGUMENT_ERROR; 2223 } 2224 } 2225 2226 uint32_t TestCollator::getVariableTop(UErrorCode &status) const 2227 { 2228 2229 // api not used, this is to make the compiler happy 2230 if (U_SUCCESS(status)) { 2231 return 0; 2232 } 2233 return (uint32_t)(0xFFFFFFFFu); 2234 } 2235 2236 UnicodeSet * TestCollator::getTailoredSet(UErrorCode &status) const 2237 { 2238 return Collator::getTailoredSet(status); 2239 } 2240 2241 void TestCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) 2242 { 2243 Collator::setLocales(requestedLocale, validLocale, actualLocale); 2244 } 2245 2246 2247 void CollationAPITest::TestSubclass() 2248 { 2249 TestCollator col1; 2250 TestCollator col2; 2251 doAssert(col1 != col2, "2 instances of TestCollator should be different"); 2252 if (col1.hashCode() != col2.hashCode()) { 2253 errln("Every TestCollator has the same hashcode"); 2254 } 2255 UnicodeString abc("abc", 3); 2256 UnicodeString bcd("bcd", 3); 2257 if (col1.compare(abc, bcd) != abc.compare(bcd)) { 2258 errln("TestCollator compare should be the same as the default " 2259 "string comparison"); 2260 } 2261 CollationKey key; 2262 UErrorCode status = U_ZERO_ERROR; 2263 col1.getCollationKey(abc, key, status); 2264 int32_t length = 0; 2265 const char* bytes = (const char *)key.getByteArray(length); 2266 UnicodeString keyarray(bytes, length, NULL, status); 2267 if (abc != keyarray) { 2268 errln("TestCollator collationkey API is returning wrong values"); 2269 } 2270 2271 UnicodeSet expectedset(0, 0x10FFFF); 2272 UnicodeSet *defaultset = col1.getTailoredSet(status); 2273 if (!defaultset->containsAll(expectedset) 2274 || !expectedset.containsAll(*defaultset)) { 2275 errln("Error: expected default tailoring to be 0 to 0x10ffff"); 2276 } 2277 delete defaultset; 2278 2279 // use base class implementation 2280 Locale loc1 = Locale::getGermany(); 2281 Locale loc2 = Locale::getFrance(); 2282 col1.setLocales(loc1, loc2, loc2); // default implementation has no effect 2283 2284 UnicodeString displayName; 2285 col1.getDisplayName(loc1, loc2, displayName); // de_DE collator in fr_FR locale 2286 2287 TestCollator col3(UCOL_TERTIARY, UNORM_NONE); 2288 UnicodeString a("a"); 2289 UnicodeString b("b"); 2290 Collator::EComparisonResult result = Collator::EComparisonResult(a.compare(b)); 2291 if(col1.compare(a, b) != result) { 2292 errln("Collator doesn't give default result"); 2293 } 2294 if(col1.compare(a, b, 1) != result) { 2295 errln("Collator doesn't give default result"); 2296 } 2297 if(col1.compare(a.getBuffer(), a.length(), b.getBuffer(), b.length()) != result) { 2298 errln("Collator doesn't give default result"); 2299 } 2300 } 2301 2302 void CollationAPITest::TestNULLCharTailoring() 2303 { 2304 UErrorCode status = U_ZERO_ERROR; 2305 UChar buf[256] = {0}; 2306 int32_t len = u_unescape("&a < '\\u0000'", buf, 256); 2307 UnicodeString first((UChar)0x0061); 2308 UnicodeString second((UChar)0); 2309 RuleBasedCollator *coll = new RuleBasedCollator(UnicodeString(buf, len), status); 2310 if(U_FAILURE(status)) { 2311 delete coll; 2312 errcheckln(status, "Failed to open collator - %s", u_errorName(status)); 2313 return; 2314 } 2315 UCollationResult res = coll->compare(first, second, status); 2316 if(res != UCOL_LESS) { 2317 errln("a should be less then NULL after tailoring"); 2318 } 2319 delete coll; 2320 } 2321 2322 void CollationAPITest::TestClone() { 2323 logln("\ninit c0"); 2324 UErrorCode status = U_ZERO_ERROR; 2325 RuleBasedCollator* c0 = (RuleBasedCollator*)Collator::createInstance(status); 2326 2327 if (U_FAILURE(status)) { 2328 errcheckln(status, "Collator::CreateInstance(status) failed with %s", u_errorName(status)); 2329 return; 2330 } 2331 2332 c0->setStrength(Collator::TERTIARY); 2333 dump("c0", c0, status); 2334 2335 logln("\ninit c1"); 2336 RuleBasedCollator* c1 = (RuleBasedCollator*)Collator::createInstance(status); 2337 c1->setStrength(Collator::TERTIARY); 2338 UColAttributeValue val = c1->getAttribute(UCOL_CASE_FIRST, status); 2339 if(val == UCOL_LOWER_FIRST){ 2340 c1->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); 2341 }else{ 2342 c1->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); 2343 } 2344 dump("c0", c0, status); 2345 dump("c1", c1, status); 2346 2347 logln("\ninit c2"); 2348 RuleBasedCollator* c2 = (RuleBasedCollator*)c1->clone(); 2349 val = c2->getAttribute(UCOL_CASE_FIRST, status); 2350 if(val == UCOL_LOWER_FIRST){ 2351 c2->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); 2352 }else{ 2353 c2->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); 2354 } 2355 if(U_FAILURE(status)){ 2356 errln("set and get attributes of collator failed. %s\n", u_errorName(status)); 2357 return; 2358 } 2359 dump("c0", c0, status); 2360 dump("c1", c1, status); 2361 dump("c2", c2, status); 2362 if(*c1 == *c2){ 2363 errln("The cloned objects refer to same data"); 2364 } 2365 delete c0; 2366 delete c1; 2367 delete c2; 2368 } 2369 2370 void CollationAPITest::TestCloneBinary() { 2371 IcuTestErrorCode errorCode(*this, "TestCloneBinary"); 2372 LocalPointer<Collator> root(Collator::createInstance(Locale::getRoot(), errorCode)); 2373 LocalPointer<Collator> coll(Collator::createInstance("de@collation=phonebook", errorCode)); 2374 if(errorCode.errDataIfFailureAndReset("Collator::createInstance(de@collation=phonebook)")) { 2375 return; 2376 } 2377 RuleBasedCollator *rbRoot = dynamic_cast<RuleBasedCollator *>(root.getAlias()); 2378 RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll.getAlias()); 2379 if(rbRoot == NULL || rbc == NULL) { 2380 infoln("root or de@collation=phonebook is not a RuleBasedCollator"); 2381 return; 2382 } 2383 rbc->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode); 2384 UnicodeString uUmlaut((UChar)0xfc); 2385 UnicodeString ue = UNICODE_STRING_SIMPLE("ue"); 2386 assertEquals("rbc/primary: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc->compare(uUmlaut, ue, errorCode)); 2387 uint8_t bin[25000]; 2388 int32_t binLength = rbc->cloneBinary(bin, UPRV_LENGTHOF(bin), errorCode); 2389 if(errorCode.errDataIfFailureAndReset("rbc->cloneBinary()")) { 2390 return; 2391 } 2392 logln("rbc->cloneBinary() -> %d bytes", (int)binLength); 2393 2394 RuleBasedCollator rbc2(bin, binLength, rbRoot, errorCode); 2395 if(errorCode.errDataIfFailureAndReset("RuleBasedCollator(rbc binary)")) { 2396 return; 2397 } 2398 assertEquals("rbc2.strength==primary", (int32_t)UCOL_PRIMARY, rbc2.getAttribute(UCOL_STRENGTH, errorCode)); 2399 assertEquals("rbc2: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc2.compare(uUmlaut, ue, errorCode)); 2400 assertTrue("rbc==rbc2", *rbc == rbc2); 2401 uint8_t bin2[25000]; 2402 int32_t bin2Length = rbc2.cloneBinary(bin2, UPRV_LENGTHOF(bin2), errorCode); 2403 assertEquals("len(rbc binary)==len(rbc2 binary)", binLength, bin2Length); 2404 assertTrue("rbc binary==rbc2 binary", binLength == bin2Length && memcmp(bin, bin2, binLength) == 0); 2405 2406 RuleBasedCollator rbc3(bin, -1, rbRoot, errorCode); 2407 if(errorCode.errDataIfFailureAndReset("RuleBasedCollator(rbc binary, length<0)")) { 2408 return; 2409 } 2410 assertEquals("rbc3.strength==primary", (int32_t)UCOL_PRIMARY, rbc3.getAttribute(UCOL_STRENGTH, errorCode)); 2411 assertEquals("rbc3: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc3.compare(uUmlaut, ue, errorCode)); 2412 assertTrue("rbc==rbc3", *rbc == rbc3); 2413 } 2414 2415 void CollationAPITest::TestIterNumeric() { 2416 // Regression test for ticket #9915. 2417 // The collation code sometimes masked the continuation marker away 2418 // but later tested the result for isContinuation(). 2419 // This test case failed because the third bytes of the computed numeric-collation primaries 2420 // were permutated with the script reordering table. 2421 // It should have been possible to reproduce this with the root collator 2422 // and characters with appropriate 3-byte primary weights. 2423 // The effectiveness of this test depends completely on the collation elements 2424 // and on the implementation code. 2425 IcuTestErrorCode errorCode(*this, "TestIterNumeric"); 2426 RuleBasedCollator coll(UnicodeString("[reorder Hang Hani]"), errorCode); 2427 if(errorCode.errDataIfFailureAndReset("RuleBasedCollator constructor")) { 2428 return; 2429 } 2430 coll.setAttribute(UCOL_NUMERIC_COLLATION, UCOL_ON, errorCode); 2431 UCharIterator iter40, iter72; 2432 uiter_setUTF8(&iter40, "\x34\x30", 2); 2433 uiter_setUTF8(&iter72, "\x37\x32", 2); 2434 UCollationResult result = coll.compare(iter40, iter72, errorCode); 2435 assertEquals("40<72", (int32_t)UCOL_LESS, (int32_t)result); 2436 } 2437 2438 void CollationAPITest::TestBadKeywords() { 2439 // Test locale IDs with errors. 2440 // Valid locale IDs are tested via data-driven tests. 2441 UErrorCode errorCode = U_ZERO_ERROR; 2442 Locale bogusLocale(Locale::getRoot()); 2443 bogusLocale.setToBogus(); 2444 LocalPointer<Collator> coll(Collator::createInstance(bogusLocale, errorCode)); 2445 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 2446 errln("Collator::createInstance(bogus locale) did not fail as expected - %s", 2447 u_errorName(errorCode)); 2448 } 2449 2450 // Unknown value. 2451 const char *localeID = "it-u-ks-xyz"; 2452 errorCode = U_ZERO_ERROR; 2453 coll.adoptInstead(Collator::createInstance(localeID, errorCode)); 2454 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 2455 dataerrln("Collator::createInstance(%s) did not fail as expected - %s", 2456 localeID, u_errorName(errorCode)); 2457 } 2458 2459 // Unsupported attributes. 2460 localeID = "it@colHiraganaQuaternary=true"; 2461 errorCode = U_ZERO_ERROR; 2462 coll.adoptInstead(Collator::createInstance(localeID, errorCode)); 2463 if(errorCode != U_UNSUPPORTED_ERROR) { 2464 if (errorCode == U_FILE_ACCESS_ERROR) { 2465 dataerrln("Collator::createInstance(it@colHiraganaQuaternary=true) : %s", u_errorName(errorCode)); 2466 } else { 2467 errln("Collator::createInstance(%s) did not fail as expected - %s", 2468 localeID, u_errorName(errorCode)); 2469 } 2470 } 2471 2472 localeID = "it-u-vt-u24"; 2473 errorCode = U_ZERO_ERROR; 2474 coll.adoptInstead(Collator::createInstance(localeID, errorCode)); 2475 if(errorCode != U_UNSUPPORTED_ERROR) { 2476 if (errorCode == U_ILLEGAL_ARGUMENT_ERROR || errorCode == U_FILE_ACCESS_ERROR) { 2477 dataerrln("Collator::createInstance(it-u-vt-u24) : %s", u_errorName(errorCode)); 2478 } else { 2479 errln("Collator::createInstance(%s) did not fail as expected - %s", 2480 localeID, u_errorName(errorCode)); 2481 } 2482 } 2483 } 2484 2485 void CollationAPITest::TestGapTooSmall() { 2486 IcuTestErrorCode errorCode(*this, "TestGapTooSmall"); 2487 // Try to tailor >20k characters into a too-small primary gap between symbols 2488 // that have 3-byte primary weights. 2489 // In FractionalUCA.txt: 2490 // 263A; [0C BA D0, 05, 05] # Zyyy So [084A.0020.0002] * WHITE SMILING FACE 2491 // 263B; [0C BA D7, 05, 05] # Zyyy So [084B.0020.0002] * BLACK SMILING FACE 2492 { 2493 RuleBasedCollator(u"&<*\u4E00-\u9FFF", errorCode); 2494 if(errorCode.isSuccess()) { 2495 errln("no exception for primary-gap overflow"); 2496 } else if(errorCode.get() == U_BUFFER_OVERFLOW_ERROR) { 2497 // This is the expected error. 2498 // assertTrue("exception message mentions 'gap'", e.getMessage().contains("gap")); 2499 } else { 2500 errln("unexpected error for primary-gap overflow: %s", errorCode.errorName()); 2501 } 2502 errorCode.reset(); 2503 } 2504 2505 // CLDR 32/ICU 60 FractionalUCA.txt makes room at the end of the symbols range 2506 // for several 2-byte primaries, or a large number of 3-byters. 2507 // The reset point is primary-before what should be 2508 // the special currency-first-primary contraction, 2509 // which is hopefully fairly stable, but not guaranteed stable. 2510 // In FractionalUCA.txt: 2511 // FDD1 20AC; [0D 70 02, 05, 05] # CURRENCY first primary 2512 { 2513 RuleBasedCollator coll(u"&[before 1]\uFDD1<*\u4E00-\u9FFF", errorCode); 2514 assertTrue("tailored Han before currency", coll.compare(u"\u4E00", u"$", errorCode) < 0); 2515 errorCode.errIfFailureAndReset( 2516 "unexpected exception for tailoring many characters at the end of symbols"); 2517 } 2518 } 2519 2520 void CollationAPITest::dump(UnicodeString msg, RuleBasedCollator* c, UErrorCode& status) { 2521 const char* bigone = "One"; 2522 const char* littleone = "one"; 2523 2524 logln(msg + " " + c->compare(bigone, littleone) + 2525 " s: " + c->getStrength() + 2526 " u: " + c->getAttribute(UCOL_CASE_FIRST, status)); 2527 } 2528 void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */) 2529 { 2530 if (exec) logln("TestSuite CollationAPITest: "); 2531 TESTCASE_AUTO_BEGIN; 2532 TESTCASE_AUTO(TestProperty); 2533 TESTCASE_AUTO(TestKeywordValues); 2534 TESTCASE_AUTO(TestOperators); 2535 TESTCASE_AUTO(TestDuplicate); 2536 TESTCASE_AUTO(TestCompare); 2537 TESTCASE_AUTO(TestHashCode); 2538 TESTCASE_AUTO(TestCollationKey); 2539 TESTCASE_AUTO(TestElemIter); 2540 TESTCASE_AUTO(TestGetAll); 2541 TESTCASE_AUTO(TestRuleBasedColl); 2542 TESTCASE_AUTO(TestDecomposition); 2543 TESTCASE_AUTO(TestSafeClone); 2544 TESTCASE_AUTO(TestSortKey); 2545 TESTCASE_AUTO(TestSortKeyOverflow); 2546 TESTCASE_AUTO(TestMaxExpansion); 2547 TESTCASE_AUTO(TestDisplayName); 2548 TESTCASE_AUTO(TestAttribute); 2549 TESTCASE_AUTO(TestVariableTopSetting); 2550 TESTCASE_AUTO(TestMaxVariable); 2551 TESTCASE_AUTO(TestRules); 2552 TESTCASE_AUTO(TestGetLocale); 2553 TESTCASE_AUTO(TestBounds); 2554 TESTCASE_AUTO(TestGetTailoredSet); 2555 TESTCASE_AUTO(TestUClassID); 2556 TESTCASE_AUTO(TestSubclass); 2557 TESTCASE_AUTO(TestNULLCharTailoring); 2558 TESTCASE_AUTO(TestClone); 2559 TESTCASE_AUTO(TestCloneBinary); 2560 TESTCASE_AUTO(TestIterNumeric); 2561 TESTCASE_AUTO(TestBadKeywords); 2562 TESTCASE_AUTO(TestGapTooSmall); 2563 TESTCASE_AUTO_END; 2564 } 2565 2566 #endif /* #if !UCONFIG_NO_COLLATION */ 2567