1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 //=============================================================================== 9 // 10 // File apicoll.cpp 11 // 12 // 13 // 14 // Created by: Helena Shih 15 // 16 // Modification History: 17 // 18 // Date Name Description 19 // 2/5/97 aliu Added streamIn and streamOut methods. Added 20 // constructor which reads RuleBasedCollator object from 21 // a binary file. Added writeToFile method which streams 22 // RuleBasedCollator out to a binary file. The streamIn 23 // and streamOut methods use istream and ostream objects 24 // in binary mode. 25 // 6/30/97 helena Added tests for CollationElementIterator::setText, getOffset 26 // setOffset and DecompositionIterator::getOffset, setOffset. 27 // DecompositionIterator is made public so add class scope 28 // testing. 29 // 02/10/98 damiba Added test for compare(UnicodeString&, UnicodeString&, int32_t) 30 //=============================================================================== 31 32 #include "unicode/utypes.h" 33 34 #if !UCONFIG_NO_COLLATION 35 36 #include "unicode/localpointer.h" 37 #include "unicode/coll.h" 38 #include "unicode/tblcoll.h" 39 #include "unicode/coleitr.h" 40 #include "unicode/sortkey.h" 41 #include "apicoll.h" 42 #include "unicode/chariter.h" 43 #include "unicode/schriter.h" 44 #include "unicode/ustring.h" 45 #include "unicode/ucol.h" 46 47 #include "sfwdchit.h" 48 #include "cmemory.h" 49 #include <stdlib.h> 50 51 void 52 CollationAPITest::doAssert(UBool condition, const char *message) 53 { 54 if (!condition) { 55 errln(UnicodeString("ERROR : ") + message); 56 } 57 } 58 59 // Collator Class Properties 60 // ctor, dtor, createInstance, compare, getStrength/setStrength 61 // getDecomposition/setDecomposition, getDisplayName 62 void 63 CollationAPITest::TestProperty(/* char* par */) 64 { 65 UErrorCode success = U_ZERO_ERROR; 66 Collator *col = 0; 67 /* 68 * Expected version of the English collator. 69 * Currently, the major/minor version numbers change when the builder code 70 * changes, 71 * number 2 is from the tailoring data version and 72 * number 3 is the UCA version. 73 * This changes with every UCA version change, and the expected value 74 * needs to be adjusted. 75 * Same in cintltst/capitst.c. 76 */ 77 UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A}; // from ICU 4.4/UCA 5.2 78 UVersionInfo versionArray; 79 80 logln("The property tests begin : "); 81 logln("Test ctors : "); 82 col = Collator::createInstance(Locale::getEnglish(), success); 83 if (U_FAILURE(success)){ 84 errcheckln(success, "Default Collator creation failed. - %s", u_errorName(success)); 85 return; 86 } 87 88 StringEnumeration* kwEnum = col->getKeywordValuesForLocale("", Locale::getEnglish(),true,success); 89 if (U_FAILURE(success)){ 90 errcheckln(success, "Get Keyword Values for Locale failed. - %s", u_errorName(success)); 91 return; 92 } 93 delete kwEnum; 94 95 col->getVersion(versionArray); 96 // Check for a version greater than some value rather than equality 97 // so that we need not update the expected version each time. 98 if (uprv_memcmp(versionArray, currVersionArray, 4)<0) { 99 errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x", 100 versionArray[0], versionArray[1], versionArray[2], versionArray[3]); 101 } else { 102 logln("Collator::getVersion() result: %02x.%02x.%02x.%02x", 103 versionArray[0], versionArray[1], versionArray[2], versionArray[3]); 104 } 105 106 doAssert((col->compare("ab", "abc") == Collator::LESS), "ab < abc comparison failed"); 107 doAssert((col->compare("ab", "AB") == Collator::LESS), "ab < AB comparison failed"); 108 doAssert((col->compare("blackbird", "black-bird") == Collator::GREATER), "black-bird > blackbird comparison failed"); 109 doAssert((col->compare("black bird", "black-bird") == Collator::LESS), "black bird > black-bird comparison failed"); 110 doAssert((col->compare("Hello", "hello") == Collator::GREATER), "Hello > hello comparison failed"); 111 doAssert((col->compare("","",success) == UCOL_EQUAL), "Comparison between empty strings failed"); 112 113 doAssert((col->compareUTF8("\x61\x62\xc3\xa4", "\x61\x62\xc3\x9f", success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UTF-8 comparison failed"); 114 success = U_ZERO_ERROR; 115 { 116 UnicodeString abau=UNICODE_STRING_SIMPLE("\\x61\\x62\\xe4").unescape(); 117 UnicodeString abss=UNICODE_STRING_SIMPLE("\\x61\\x62\\xdf").unescape(); 118 UCharIterator abauIter, abssIter; 119 uiter_setReplaceable(&abauIter, &abau); 120 uiter_setReplaceable(&abssIter, &abss); 121 doAssert((col->compare(abauIter, abssIter, success) == UCOL_LESS), "ab a-umlaut < ab sharp-s UCharIterator comparison failed"); 122 success = U_ZERO_ERROR; 123 } 124 125 /*start of update [Bertrand A. D. 02/10/98]*/ 126 doAssert((col->compare("ab", "abc", 2) == Collator::EQUAL), "ab = abc with length 2 comparison failed"); 127 doAssert((col->compare("ab", "AB", 2) == Collator::LESS), "ab < AB with length 2 comparison failed"); 128 doAssert((col->compare("ab", "Aa", 1) == Collator::LESS), "ab < Aa with length 1 comparison failed"); 129 doAssert((col->compare("ab", "Aa", 2) == Collator::GREATER), "ab > Aa with length 2 comparison failed"); 130 doAssert((col->compare("black-bird", "blackbird", 5) == Collator::EQUAL), "black-bird = blackbird with length of 5 comparison failed"); 131 doAssert((col->compare("black bird", "black-bird", 10) == Collator::LESS), "black bird < black-bird with length 10 comparison failed"); 132 doAssert((col->compare("Hello", "hello", 5) == Collator::GREATER), "Hello > hello with length 5 comparison failed"); 133 /*end of update [Bertrand A. D. 02/10/98]*/ 134 135 136 logln("Test ctors ends."); 137 logln("testing Collator::getStrength() method ..."); 138 doAssert((col->getStrength() == Collator::TERTIARY), "collation object has the wrong strength"); 139 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference"); 140 141 142 logln("testing Collator::setStrength() method ..."); 143 col->setStrength(Collator::SECONDARY); 144 doAssert((col->getStrength() != Collator::TERTIARY), "collation object's strength is secondary difference"); 145 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference"); 146 doAssert((col->getStrength() == Collator::SECONDARY), "collation object has the wrong strength"); 147 148 UnicodeString name; 149 150 logln("Get display name for the US English collation in German : "); 151 logln(Collator::getDisplayName(Locale::getUS(), Locale::getGerman(), name)); 152 doAssert((name == UnicodeString("Englisch (Vereinigte Staaten)")), "getDisplayName failed"); 153 154 logln("Get display name for the US English collation in English : "); 155 logln(Collator::getDisplayName(Locale::getUS(), Locale::getEnglish(), name)); 156 doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed"); 157 #if 0 158 // weiv : this test is bogus if we're running on any machine that has different default locale than English. 159 // Therefore, it is banned! 160 logln("Get display name for the US English in default locale language : "); 161 logln(Collator::getDisplayName(Locale::US, name)); 162 doAssert((name == UnicodeString("English (United States)")), "getDisplayName failed if this is an English machine"); 163 #endif 164 delete col; col = 0; 165 RuleBasedCollator *rcol = (RuleBasedCollator *)Collator::createInstance("da_DK", 166 success); 167 if (U_FAILURE(success)) { 168 errcheckln(success, "Collator::createInstance(\"da_DK\") failed - %s", u_errorName(success)); 169 return; 170 } 171 const UnicodeString &daRules = rcol->getRules(); 172 if(daRules.isEmpty()) { 173 dataerrln("missing da_DK tailoring rule string"); 174 } else { 175 doAssert(daRules.indexOf("aa") >= 0, "da_DK rules do not contain 'aa'"); 176 } 177 delete rcol; 178 179 col = Collator::createInstance(Locale::getFrench(), success); 180 if (U_FAILURE(success)) 181 { 182 errln("Creating French collation failed."); 183 return; 184 } 185 186 col->setStrength(Collator::PRIMARY); 187 logln("testing Collator::getStrength() method again ..."); 188 doAssert((col->getStrength() != Collator::TERTIARY), "collation object has the wrong strength"); 189 doAssert((col->getStrength() == Collator::PRIMARY), "collation object's strength is not primary difference"); 190 191 logln("testing French Collator::setStrength() method ..."); 192 col->setStrength(Collator::TERTIARY); 193 doAssert((col->getStrength() == Collator::TERTIARY), "collation object's strength is not tertiary difference"); 194 doAssert((col->getStrength() != Collator::PRIMARY), "collation object's strength is primary difference"); 195 doAssert((col->getStrength() != Collator::SECONDARY), "collation object's strength is secondary difference"); 196 delete col; 197 198 logln("Create junk collation: "); 199 Locale abcd("ab", "CD", ""); 200 success = U_ZERO_ERROR; 201 Collator *junk = 0; 202 junk = Collator::createInstance(abcd, success); 203 204 if (U_FAILURE(success)) 205 { 206 errln("Junk collation creation failed, should at least return default."); 207 return; 208 } 209 210 doAssert(((RuleBasedCollator *)junk)->getRules().isEmpty(), 211 "The root collation should be returned for an unsupported language."); 212 Collator *frCol = Collator::createInstance(Locale::getCanadaFrench(), success); 213 if (U_FAILURE(success)) 214 { 215 errln("Creating fr_CA collator failed."); 216 delete junk; 217 return; 218 } 219 220 // If the default locale isn't French, the French and non-French collators 221 // should be different 222 if (frCol->getLocale(ULOC_ACTUAL_LOCALE, success) != Locale::getCanadaFrench()) { 223 doAssert((*frCol != *junk), "The junk is the same as the fr_CA collator."); 224 } 225 Collator *aFrCol = frCol->clone(); 226 doAssert((*frCol == *aFrCol), "The cloning of a fr_CA collator failed."); 227 logln("Collator property test ended."); 228 229 delete frCol; 230 delete aFrCol; 231 delete junk; 232 } 233 234 void 235 CollationAPITest::TestRuleBasedColl() 236 { 237 RuleBasedCollator *col1, *col2, *col3, *col4; 238 UErrorCode status = U_ZERO_ERROR; 239 240 UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E"); 241 UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E"); 242 243 col1 = new RuleBasedCollator(ruleset1, status); 244 if (U_FAILURE(status)) { 245 errcheckln(status, "RuleBased Collator creation failed. - %s", u_errorName(status)); 246 return; 247 } 248 else { 249 logln("PASS: RuleBased Collator creation passed\n"); 250 } 251 252 status = U_ZERO_ERROR; 253 col2 = new RuleBasedCollator(ruleset2, status); 254 if (U_FAILURE(status)) { 255 errln("RuleBased Collator creation failed.\n"); 256 return; 257 } 258 else { 259 logln("PASS: RuleBased Collator creation passed\n"); 260 } 261 262 status = U_ZERO_ERROR; 263 Locale locale("aa", "AA"); 264 col3 = (RuleBasedCollator *)Collator::createInstance(locale, status); 265 if (U_FAILURE(status)) { 266 errln("Fallback Collator creation failed.: %s\n"); 267 return; 268 } 269 else { 270 logln("PASS: Fallback Collator creation passed\n"); 271 } 272 delete col3; 273 274 status = U_ZERO_ERROR; 275 col3 = (RuleBasedCollator *)Collator::createInstance(status); 276 if (U_FAILURE(status)) { 277 errln("Default Collator creation failed.: %s\n"); 278 return; 279 } 280 else { 281 logln("PASS: Default Collator creation passed\n"); 282 } 283 284 UnicodeString rule1 = col1->getRules(); 285 UnicodeString rule2 = col2->getRules(); 286 UnicodeString rule3 = col3->getRules(); 287 288 doAssert(rule1 != rule2, "Default collator getRules failed"); 289 doAssert(rule2 != rule3, "Default collator getRules failed"); 290 doAssert(rule1 != rule3, "Default collator getRules failed"); 291 292 col4 = new RuleBasedCollator(rule2, status); 293 if (U_FAILURE(status)) { 294 errln("RuleBased Collator creation failed.\n"); 295 return; 296 } 297 298 UnicodeString rule4 = col4->getRules(); 299 doAssert(rule2 == rule4, "Default collator getRules failed"); 300 int32_t length4 = 0; 301 uint8_t *clonedrule4 = col4->cloneRuleData(length4, status); 302 if (U_FAILURE(status)) { 303 errln("Cloned rule data failed.\n"); 304 return; 305 } 306 307 // free(clonedrule4); BAD API!!!! 308 uprv_free(clonedrule4); 309 310 311 delete col1; 312 delete col2; 313 delete col3; 314 delete col4; 315 } 316 317 void 318 CollationAPITest::TestRules() 319 { 320 RuleBasedCollator *coll; 321 UErrorCode status = U_ZERO_ERROR; 322 UnicodeString rules; 323 324 coll = (RuleBasedCollator *)Collator::createInstance(Locale::getEnglish(), status); 325 if (U_FAILURE(status)) { 326 errcheckln(status, "English Collator creation failed. - %s", u_errorName(status)); 327 return; 328 } 329 else { 330 logln("PASS: RuleBased Collator creation passed\n"); 331 } 332 333 coll->getRules(UCOL_TAILORING_ONLY, rules); 334 if (rules.length() != 0x00) { 335 errln("English tailored rules failed - length is 0x%x expected 0x%x", rules.length(), 0x00); 336 } 337 338 coll->getRules(UCOL_FULL_RULES, rules); 339 if (rules.length() < 0) { 340 errln("English full rules failed"); 341 } 342 delete coll; 343 } 344 345 void 346 CollationAPITest::TestDecomposition() { 347 UErrorCode status = U_ZERO_ERROR; 348 Collator *en_US = Collator::createInstance("en_US", status), 349 *el_GR = Collator::createInstance("el_GR", status), 350 *vi_VN = Collator::createInstance("vi_VN", status); 351 352 if (U_FAILURE(status)) { 353 errcheckln(status, "ERROR: collation creation failed. - %s", u_errorName(status)); 354 return; 355 } 356 357 /* there is no reason to have canonical decomposition in en_US OR default locale */ 358 if (vi_VN->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON) 359 { 360 errln("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n"); 361 } 362 363 if (el_GR->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_ON) 364 { 365 errln("ERROR: el_GR collation did not have canonical decomposition for normalization!\n"); 366 } 367 368 if (en_US->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF) 369 { 370 errln("ERROR: en_US collation had canonical decomposition for normalization!\n"); 371 } 372 373 delete en_US; 374 delete el_GR; 375 delete vi_VN; 376 } 377 378 void 379 CollationAPITest::TestSafeClone() { 380 static const int CLONETEST_COLLATOR_COUNT = 3; 381 Collator *someCollators [CLONETEST_COLLATOR_COUNT]; 382 Collator *col; 383 UErrorCode err = U_ZERO_ERROR; 384 int index; 385 386 UnicodeString test1("abCda"); 387 UnicodeString test2("abcda"); 388 389 /* one default collator & two complex ones */ 390 someCollators[0] = Collator::createInstance("en_US", err); 391 someCollators[1] = Collator::createInstance("ko", err); 392 someCollators[2] = Collator::createInstance("ja_JP", err); 393 if(U_FAILURE(err)) { 394 errcheckln(err, "Couldn't instantiate collators. Error: %s", u_errorName(err)); 395 delete someCollators[0]; 396 delete someCollators[1]; 397 delete someCollators[2]; 398 return; 399 } 400 401 /* change orig & clone & make sure they are independent */ 402 403 for (index = 0; index < CLONETEST_COLLATOR_COUNT; index++) 404 { 405 col = someCollators[index]->safeClone(); 406 if (col == 0) { 407 errln("SafeClone of collator should not return null\n"); 408 break; 409 } 410 col->setStrength(Collator::TERTIARY); 411 someCollators[index]->setStrength(Collator::PRIMARY); 412 col->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err); 413 someCollators[index]->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, err); 414 415 doAssert(col->greater(test1, test2), "Result should be \"abCda\" >>> \"abcda\" "); 416 doAssert(someCollators[index]->equals(test1, test2), "Result should be \"abcda\" == \"abCda\""); 417 delete col; 418 delete someCollators[index]; 419 } 420 } 421 422 void 423 CollationAPITest::TestHashCode(/* char* par */) 424 { 425 logln("hashCode tests begin."); 426 UErrorCode success = U_ZERO_ERROR; 427 Collator *col1 = 0; 428 col1 = Collator::createInstance(Locale::getEnglish(), success); 429 if (U_FAILURE(success)) 430 { 431 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success)); 432 return; 433 } 434 435 Collator *col2 = 0; 436 Locale dk("da", "DK", ""); 437 col2 = Collator::createInstance(dk, success); 438 if (U_FAILURE(success)) 439 { 440 errln("Danish collation creation failed."); 441 return; 442 } 443 444 Collator *col3 = 0; 445 col3 = Collator::createInstance(Locale::getEnglish(), success); 446 if (U_FAILURE(success)) 447 { 448 errln("2nd default collation creation failed."); 449 return; 450 } 451 452 logln("Collator::hashCode() testing ..."); 453 454 doAssert(col1->hashCode() != col2->hashCode(), "Hash test1 result incorrect" ); 455 doAssert(!(col1->hashCode() == col2->hashCode()), "Hash test2 result incorrect" ); 456 doAssert(col1->hashCode() == col3->hashCode(), "Hash result not equal" ); 457 458 logln("hashCode tests end."); 459 delete col1; 460 delete col2; 461 462 UnicodeString test1("Abcda"); 463 UnicodeString test2("abcda"); 464 465 CollationKey sortk1, sortk2, sortk3; 466 UErrorCode status = U_ZERO_ERROR; 467 468 col3->getCollationKey(test1, sortk1, status); 469 col3->getCollationKey(test2, sortk2, status); 470 col3->getCollationKey(test2, sortk3, status); 471 472 doAssert(sortk1.hashCode() != sortk2.hashCode(), "Hash test1 result incorrect"); 473 doAssert(sortk2.hashCode() == sortk3.hashCode(), "Hash result not equal" ); 474 475 delete col3; 476 } 477 478 //---------------------------------------------------------------------------- 479 // CollationKey -- Tests the CollationKey methods 480 // 481 void 482 CollationAPITest::TestCollationKey(/* char* par */) 483 { 484 logln("testing CollationKey begins..."); 485 Collator *col = 0; 486 UErrorCode success=U_ZERO_ERROR; 487 col = Collator::createInstance(Locale::getEnglish(), success); 488 if (U_FAILURE(success)) 489 { 490 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success)); 491 return; 492 } 493 col->setStrength(Collator::TERTIARY); 494 495 CollationKey sortk1, sortk2; 496 UnicodeString test1("Abcda"), test2("abcda"); 497 UErrorCode key1Status = U_ZERO_ERROR, key2Status = U_ZERO_ERROR; 498 499 logln("Testing weird arguments"); 500 // No string vs. empty string vs. completely-ignorable string: 501 // See ICU ticket #10495. 502 CollationKey sortkNone; 503 int32_t length; 504 sortkNone.getByteArray(length); 505 doAssert(!sortkNone.isBogus() && length == 0, 506 "Default-constructed collation key should be empty"); 507 CollationKey sortkEmpty; 508 col->getCollationKey(NULL, 0, sortkEmpty, key1Status); 509 // key gets reset here 510 const uint8_t* byteArrayEmpty = sortkEmpty.getByteArray(length); 511 doAssert(sortkEmpty.isBogus() == FALSE && length == 3 && 512 byteArrayEmpty[0] == 1 && byteArrayEmpty[1] == 1 && byteArrayEmpty[2] == 0, 513 "Empty string should return a collation key with empty levels"); 514 doAssert(sortkNone.compareTo(sortkEmpty) == Collator::LESS, 515 "Expected no collation key < collation key for empty string"); 516 doAssert(sortkEmpty.compareTo(sortkNone) == Collator::GREATER, 517 "Expected collation key for empty string > no collation key"); 518 519 CollationKey sortkIgnorable; 520 // Most control codes and CGJ are completely ignorable. 521 // A string with only completely ignorables must compare equal to an empty string. 522 col->getCollationKey(UnicodeString((UChar)1).append((UChar)0x34f), sortkIgnorable, key1Status); 523 sortkIgnorable.getByteArray(length); 524 doAssert(!sortkIgnorable.isBogus() && length == 3, 525 "Completely ignorable string should return a collation key with empty levels"); 526 doAssert(sortkIgnorable.compareTo(sortkEmpty) == Collator::EQUAL, 527 "Completely ignorable string should compare equal to empty string"); 528 529 // bogus key returned here 530 key1Status = U_ILLEGAL_ARGUMENT_ERROR; 531 col->getCollationKey(NULL, 0, sortk1, key1Status); 532 doAssert(sortk1.isBogus() && (sortk1.getByteArray(length), length) == 0, 533 "Error code should return bogus collation key"); 534 535 key1Status = U_ZERO_ERROR; 536 logln("Use tertiary comparison level testing ...."); 537 538 col->getCollationKey(test1, sortk1, key1Status); 539 if (U_FAILURE(key1Status)) { 540 errln("getCollationKey(Abcda) failed - %s", u_errorName(key1Status)); 541 return; 542 } 543 doAssert((sortk1.compareTo(col->getCollationKey(test2, sortk2, key2Status))) 544 == Collator::GREATER, 545 "Result should be \"Abcda\" >>> \"abcda\""); 546 547 CollationKey sortk3(sortk2), sortkNew; 548 549 sortkNew = sortk1; 550 doAssert((sortk1 != sortk2), "The sort keys should be different"); 551 doAssert((sortk1.hashCode() != sortk2.hashCode()), "sort key hashCode() failed"); 552 doAssert((sortk2 == sortk3), "The sort keys should be the same"); 553 doAssert((sortk1 == sortkNew), "The sort keys assignment failed"); 554 doAssert((sortk1.hashCode() == sortkNew.hashCode()), "sort key hashCode() failed"); 555 doAssert((sortkNew != sortk3), "The sort keys should be different"); 556 doAssert(sortk1.compareTo(sortk3) == Collator::GREATER, "Result should be \"Abcda\" >>> \"abcda\""); 557 doAssert(sortk2.compareTo(sortk3) == Collator::EQUAL, "Result should be \"abcda\" == \"abcda\""); 558 doAssert(sortkEmpty.compareTo(sortk1) == Collator::LESS, "Result should be (empty key) <<< \"Abcda\""); 559 doAssert(sortk1.compareTo(sortkEmpty) == Collator::GREATER, "Result should be \"Abcda\" >>> (empty key)"); 560 doAssert(sortkEmpty.compareTo(sortkEmpty) == Collator::EQUAL, "Result should be (empty key) == (empty key)"); 561 doAssert(sortk1.compareTo(sortk3, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> \"abcda\""); 562 doAssert(sortk2.compareTo(sortk3, success) == UCOL_EQUAL, "Result should be \"abcda\" == \"abcda\""); 563 doAssert(sortkEmpty.compareTo(sortk1, success) == UCOL_LESS, "Result should be (empty key) <<< \"Abcda\""); 564 doAssert(sortk1.compareTo(sortkEmpty, success) == UCOL_GREATER, "Result should be \"Abcda\" >>> (empty key)"); 565 doAssert(sortkEmpty.compareTo(sortkEmpty, success) == UCOL_EQUAL, "Result should be (empty key) == (empty key)"); 566 567 int32_t cnt1, cnt2, cnt3, cnt4; 568 569 const uint8_t* byteArray1 = sortk1.getByteArray(cnt1); 570 const uint8_t* byteArray2 = sortk2.getByteArray(cnt2); 571 572 const uint8_t* byteArray3 = 0; 573 byteArray3 = sortk1.getByteArray(cnt3); 574 575 const uint8_t* byteArray4 = 0; 576 byteArray4 = sortk2.getByteArray(cnt4); 577 578 CollationKey sortk4(byteArray1, cnt1), sortk5(byteArray2, cnt2); 579 CollationKey sortk6(byteArray3, cnt3), sortk7(byteArray4, cnt4); 580 581 doAssert(sortk1.compareTo(sortk4) == Collator::EQUAL, "CollationKey::toByteArray(sortk1) Failed."); 582 doAssert(sortk2.compareTo(sortk5) == Collator::EQUAL, "CollationKey::toByteArray(sortk2) Failed."); 583 doAssert(sortk4.compareTo(sortk5) == Collator::GREATER, "sortk4 >>> sortk5 Failed"); 584 doAssert(sortk1.compareTo(sortk6) == Collator::EQUAL, "CollationKey::getByteArray(sortk1) Failed."); 585 doAssert(sortk2.compareTo(sortk7) == Collator::EQUAL, "CollationKey::getByteArray(sortk2) Failed."); 586 doAssert(sortk6.compareTo(sortk7) == Collator::GREATER, "sortk6 >>> sortk7 Failed"); 587 588 logln("Equality tests : "); 589 doAssert(sortk1 == sortk4, "sortk1 == sortk4 Failed."); 590 doAssert(sortk2 == sortk5, "sortk2 == sortk5 Failed."); 591 doAssert(sortk1 != sortk5, "sortk1 != sortk5 Failed."); 592 doAssert(sortk1 == sortk6, "sortk1 == sortk6 Failed."); 593 doAssert(sortk2 == sortk7, "sortk2 == sortk7 Failed."); 594 doAssert(sortk1 != sortk7, "sortk1 != sortk7 Failed."); 595 596 byteArray1 = 0; 597 byteArray2 = 0; 598 599 sortk3 = sortk1; 600 doAssert(sortk1 == sortk3, "sortk1 = sortk3 assignment Failed."); 601 doAssert(sortk2 != sortk3, "sortk2 != sortk3 Failed."); 602 logln("testing sortkey ends..."); 603 604 col->setStrength(Collator::SECONDARY); 605 doAssert(col->getCollationKey(test1, sortk1, key1Status).compareTo( 606 col->getCollationKey(test2, sortk2, key2Status)) 607 == Collator::EQUAL, 608 "Result should be \"Abcda\" == \"abcda\""); 609 delete col; 610 } 611 612 //---------------------------------------------------------------------------- 613 // Tests the CollatorElementIterator class. 614 // ctor, RuleBasedCollator::createCollationElementIterator(), operator==, operator!= 615 // 616 void 617 CollationAPITest::TestElemIter(/* char* par */) 618 { 619 logln("testing sortkey begins..."); 620 Collator *col = 0; 621 UErrorCode success = U_ZERO_ERROR; 622 col = Collator::createInstance(Locale::getEnglish(), success); 623 if (U_FAILURE(success)) 624 { 625 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success)); 626 return; 627 } 628 629 UnicodeString testString1("XFILE What subset of all possible test cases has the highest probability of detecting the most errors?"); 630 UnicodeString testString2("Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?"); 631 logln("Constructors and comparison testing...."); 632 CollationElementIterator *iterator1 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1); 633 634 CharacterIterator *chariter=new StringCharacterIterator(testString1); 635 CollationElementIterator *coliter=((RuleBasedCollator*)col)->createCollationElementIterator(*chariter); 636 637 // copy ctor 638 CollationElementIterator *iterator2 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString1); 639 CollationElementIterator *iterator3 = ((RuleBasedCollator*)col)->createCollationElementIterator(testString2); 640 641 int32_t offset = iterator1->getOffset(); 642 if (offset != 0) { 643 errln("Error in getOffset for collation element iterator\n"); 644 return; 645 } 646 iterator1->setOffset(6, success); 647 if (U_FAILURE(success)) { 648 errln("Error in setOffset for collation element iterator\n"); 649 return; 650 } 651 iterator1->setOffset(0, success); 652 int32_t order1, order2, order3; 653 doAssert((*iterator1 == *iterator2), "The two iterators should be the same"); 654 doAssert((*iterator1 != *iterator3), "The two iterators should be different"); 655 656 doAssert((*coliter == *iterator1), "The two iterators should be the same"); 657 doAssert((*coliter == *iterator2), "The two iterators should be the same"); 658 doAssert((*coliter != *iterator3), "The two iterators should be different"); 659 660 order1 = iterator1->next(success); 661 if (U_FAILURE(success)) 662 { 663 errln("Somehow ran out of memory stepping through the iterator."); 664 return; 665 } 666 667 doAssert((*iterator1 != *iterator2), "The first iterator advance failed"); 668 order2 = iterator2->getOffset(); 669 doAssert((order1 != order2), "The order result should not be the same"); 670 order2 = iterator2->next(success); 671 if (U_FAILURE(success)) 672 { 673 errln("Somehow ran out of memory stepping through the iterator."); 674 return; 675 } 676 677 doAssert((*iterator1 == *iterator2), "The second iterator advance failed"); 678 doAssert((order1 == order2), "The order result should be the same"); 679 order3 = iterator3->next(success); 680 if (U_FAILURE(success)) 681 { 682 errln("Somehow ran out of memory stepping through the iterator."); 683 return; 684 } 685 686 doAssert((CollationElementIterator::primaryOrder(order1) == 687 CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same"); 688 doAssert((CollationElementIterator::secondaryOrder(order1) == 689 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same"); 690 doAssert((CollationElementIterator::tertiaryOrder(order1) == 691 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same"); 692 693 order1 = iterator1->next(success); order3 = iterator3->next(success); 694 if (U_FAILURE(success)) 695 { 696 errln("Somehow ran out of memory stepping through the iterator."); 697 return; 698 } 699 700 doAssert((CollationElementIterator::primaryOrder(order1) == 701 CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical"); 702 doAssert((CollationElementIterator::tertiaryOrder(order1) != 703 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different"); 704 705 order1 = iterator1->next(success); 706 order3 = iterator3->next(success); 707 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */ 708 /* 709 doAssert((CollationElementIterator::secondaryOrder(order1) != 710 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same"); 711 */ 712 doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached"); 713 714 iterator1->reset(); iterator2->reset(); iterator3->reset(); 715 order1 = iterator1->next(success); 716 if (U_FAILURE(success)) 717 { 718 errln("Somehow ran out of memory stepping through the iterator."); 719 return; 720 } 721 722 doAssert((*iterator1 != *iterator2), "The first iterator advance failed"); 723 724 order2 = iterator2->next(success); 725 if (U_FAILURE(success)) 726 { 727 errln("Somehow ran out of memory stepping through the iterator."); 728 return; 729 } 730 731 doAssert((*iterator1 == *iterator2), "The second iterator advance failed"); 732 doAssert((order1 == order2), "The order result should be the same"); 733 734 order3 = iterator3->next(success); 735 if (U_FAILURE(success)) 736 { 737 errln("Somehow ran out of memory stepping through the iterator."); 738 return; 739 } 740 741 doAssert((CollationElementIterator::primaryOrder(order1) == 742 CollationElementIterator::primaryOrder(order3)), "The primary orders should be the same"); 743 doAssert((CollationElementIterator::secondaryOrder(order1) == 744 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should be the same"); 745 doAssert((CollationElementIterator::tertiaryOrder(order1) == 746 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be the same"); 747 748 order1 = iterator1->next(success); order2 = iterator2->next(success); order3 = iterator3->next(success); 749 if (U_FAILURE(success)) 750 { 751 errln("Somehow ran out of memory stepping through the iterator."); 752 return; 753 } 754 755 doAssert((CollationElementIterator::primaryOrder(order1) == 756 CollationElementIterator::primaryOrder(order3)), "The primary orders should be identical"); 757 doAssert((CollationElementIterator::tertiaryOrder(order1) != 758 CollationElementIterator::tertiaryOrder(order3)), "The tertiary orders should be different"); 759 760 order1 = iterator1->next(success); order3 = iterator3->next(success); 761 if (U_FAILURE(success)) 762 { 763 errln("Somehow ran out of memory stepping through the iterator."); 764 return; 765 } 766 767 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */ 768 /* 769 doAssert((CollationElementIterator::secondaryOrder(order1) != 770 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same"); 771 */ 772 doAssert((order1 != CollationElementIterator::NULLORDER), "Unexpected end of iterator reached"); 773 doAssert((*iterator2 != *iterator3), "The iterators should be different"); 774 775 776 //test error values 777 success=U_UNSUPPORTED_ERROR; 778 Collator *colerror=NULL; 779 colerror=Collator::createInstance(Locale::getEnglish(), success); 780 if (colerror != 0 || success == U_ZERO_ERROR){ 781 errln("Error: createInstance(UErrorCode != U_ZERO_ERROR) should just return and not create an instance\n"); 782 } 783 int32_t position=coliter->previous(success); 784 if(position != CollationElementIterator::NULLORDER){ 785 errln((UnicodeString)"Expected NULLORDER got" + position); 786 } 787 coliter->reset(); 788 coliter->setText(*chariter, success); 789 if(!U_FAILURE(success)){ 790 errln("Expeceted error"); 791 } 792 iterator1->setText((UnicodeString)"hello there", success); 793 if(!U_FAILURE(success)){ 794 errln("Expeceted error"); 795 } 796 797 delete chariter; 798 delete coliter; 799 delete iterator1; 800 delete iterator2; 801 delete iterator3; 802 delete col; 803 804 805 806 logln("testing CollationElementIterator ends..."); 807 } 808 809 // Test RuleBasedCollator ctor, dtor, operator==, operator!=, clone, copy, and getRules 810 void 811 CollationAPITest::TestOperators(/* char* par */) 812 { 813 UErrorCode success = U_ZERO_ERROR; 814 UnicodeString ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E"); 815 UnicodeString ruleset2("&9 < a, A < b, B < c, C < d, D, e, E"); 816 RuleBasedCollator *col1 = new RuleBasedCollator(ruleset1, success); 817 if (U_FAILURE(success)) { 818 errcheckln(success, "RuleBasedCollator creation failed. - %s", u_errorName(success)); 819 return; 820 } 821 success = U_ZERO_ERROR; 822 RuleBasedCollator *col2 = new RuleBasedCollator(ruleset2, success); 823 if (U_FAILURE(success)) { 824 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set."); 825 return; 826 } 827 logln("The operator tests begin : "); 828 logln("testing operator==, operator!=, clone methods ..."); 829 doAssert((*col1 != *col2), "The two different table collations compared equal"); 830 *col1 = *col2; 831 doAssert((*col1 == *col2), "Collator objects not equal after assignment (operator=)"); 832 833 success = U_ZERO_ERROR; 834 Collator *col3 = Collator::createInstance(Locale::getEnglish(), success); 835 if (U_FAILURE(success)) { 836 errln("Default collation creation failed."); 837 return; 838 } 839 doAssert((*col1 != *col3), "The two different table collations compared equal"); 840 Collator* col4 = col1->clone(); 841 Collator* col5 = col3->clone(); 842 doAssert((*col1 == *col4), "Cloned collation objects not equal"); 843 doAssert((*col3 != *col4), "Two different table collations compared equal"); 844 doAssert((*col3 == *col5), "Cloned collation objects not equal"); 845 doAssert((*col4 != *col5), "Two cloned collations compared equal"); 846 847 const UnicodeString& defRules = ((RuleBasedCollator*)col3)->getRules(); 848 RuleBasedCollator* col6 = new RuleBasedCollator(defRules, success); 849 if (U_FAILURE(success)) { 850 errln("Creating default collation with rules failed."); 851 return; 852 } 853 doAssert((((RuleBasedCollator*)col3)->getRules() == col6->getRules()), "Default collator getRules failed"); 854 855 success = U_ZERO_ERROR; 856 RuleBasedCollator *col7 = new RuleBasedCollator(ruleset2, Collator::TERTIARY, success); 857 if (U_FAILURE(success)) { 858 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength."); 859 return; 860 } 861 success = U_ZERO_ERROR; 862 RuleBasedCollator *col8 = new RuleBasedCollator(ruleset2, UCOL_OFF, success); 863 if (U_FAILURE(success)) { 864 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with Normalizer::NO_OP."); 865 return; 866 } 867 success = U_ZERO_ERROR; 868 RuleBasedCollator *col9 = new RuleBasedCollator(ruleset2, Collator::PRIMARY, UCOL_ON, success); 869 if (U_FAILURE(success)) { 870 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength and Normalizer::NO_OP."); 871 return; 872 } 873 // doAssert((*col7 == *col8), "The two equal table collations compared different"); 874 doAssert((*col7 != *col9), "The two different table collations compared equal"); 875 doAssert((*col8 != *col9), "The two different table collations compared equal"); 876 877 logln("operator tests ended."); 878 delete col1; 879 delete col2; 880 delete col3; 881 delete col4; 882 delete col5; 883 delete col6; 884 delete col7; 885 delete col8; 886 delete col9; 887 } 888 889 // test clone and copy 890 void 891 CollationAPITest::TestDuplicate(/* char* par */) 892 { 893 UErrorCode status = U_ZERO_ERROR; 894 Collator *col1 = Collator::createInstance(Locale::getEnglish(), status); 895 if (U_FAILURE(status)) { 896 logln("Default collator creation failed."); 897 return; 898 } 899 Collator *col2 = col1->clone(); 900 doAssert((*col1 == *col2), "Cloned object is not equal to the orginal"); 901 UnicodeString ruleset("&9 < a, A < b, B < c, C < d, D, e, E"); 902 RuleBasedCollator *col3 = new RuleBasedCollator(ruleset, status); 903 if (U_FAILURE(status)) { 904 logln("Collation tailoring failed."); 905 return; 906 } 907 doAssert((*col1 != *col3), "Cloned object is equal to some dummy"); 908 *col3 = *((RuleBasedCollator*)col1); 909 doAssert((*col1 == *col3), "Copied object is not equal to the orginal"); 910 911 UCollationResult res; 912 UnicodeString first((UChar)0x0061); 913 UnicodeString second((UChar)0x0062); 914 UnicodeString copiedEnglishRules(((RuleBasedCollator*)col1)->getRules()); 915 916 delete col1; 917 918 // Try using the cloned collators after deleting the original data 919 res = col2->compare(first, second, status); 920 if(res != UCOL_LESS) { 921 errln("a should be less then b after tailoring"); 922 } 923 if (((RuleBasedCollator*)col2)->getRules() != copiedEnglishRules) { 924 errln(UnicodeString("English rule difference. ") 925 + copiedEnglishRules + UnicodeString("\ngetRules=") + ((RuleBasedCollator*)col2)->getRules()); 926 } 927 res = col3->compare(first, second, status); 928 if(res != UCOL_LESS) { 929 errln("a should be less then b after tailoring"); 930 } 931 if (col3->getRules() != copiedEnglishRules) { 932 errln(UnicodeString("English rule difference. ") 933 + copiedEnglishRules + UnicodeString("\ngetRules=") + col3->getRules()); 934 } 935 936 delete col2; 937 delete col3; 938 } 939 940 void 941 CollationAPITest::TestCompare(/* char* par */) 942 { 943 logln("The compare tests begin : "); 944 Collator *col = 0; 945 UErrorCode success = U_ZERO_ERROR; 946 col = Collator::createInstance(Locale::getEnglish(), success); 947 if (U_FAILURE(success)) { 948 errcheckln(success, "Default collation creation failed. - %s", u_errorName(success)); 949 return; 950 } 951 UnicodeString test1("Abcda"), test2("abcda"); 952 logln("Use tertiary comparison level testing ...."); 953 954 doAssert((!col->equals(test1, test2) ), "Result should be \"Abcda\" != \"abcda\""); 955 doAssert((col->greater(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\""); 956 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" >>> \"abcda\""); 957 958 col->setStrength(Collator::SECONDARY); 959 logln("Use secondary comparison level testing ...."); 960 961 doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\""); 962 doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\""); 963 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\""); 964 965 col->setStrength(Collator::PRIMARY); 966 logln("Use primary comparison level testing ...."); 967 968 doAssert((col->equals(test1, test2) ), "Result should be \"Abcda\" == \"abcda\""); 969 doAssert((!col->greater(test1, test2) ), "Result should be \"Abcda\" == \"abcda\""); 970 doAssert((col->greaterOrEqual(test1, test2) ), "Result should be \"Abcda\" == \"abcda\""); 971 972 // Test different APIs 973 const UChar* t1 = test1.getBuffer(); 974 int32_t t1Len = test1.length(); 975 const UChar* t2 = test2.getBuffer(); 976 int32_t t2Len = test2.length(); 977 978 doAssert((col->compare(test1, test2) == Collator::EQUAL), "Problem"); 979 doAssert((col->compare(test1, test2, success) == UCOL_EQUAL), "Problem"); 980 doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::EQUAL), "Problem"); 981 doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_EQUAL), "Problem"); 982 doAssert((col->compare(test1, test2, t1Len) == Collator::EQUAL), "Problem"); 983 doAssert((col->compare(test1, test2, t1Len, success) == UCOL_EQUAL), "Problem"); 984 985 col->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, success); 986 doAssert((col->compare(test1, test2) == Collator::GREATER), "Problem"); 987 doAssert((col->compare(test1, test2, success) == UCOL_GREATER), "Problem"); 988 doAssert((col->compare(t1, t1Len, t2, t2Len) == Collator::GREATER), "Problem"); 989 doAssert((col->compare(t1, t1Len, t2, t2Len, success) == UCOL_GREATER), "Problem"); 990 doAssert((col->compare(test1, test2, t1Len) == Collator::GREATER), "Problem"); 991 doAssert((col->compare(test1, test2, t1Len, success) == UCOL_GREATER), "Problem"); 992 993 994 995 logln("The compare tests end."); 996 delete col; 997 } 998 999 void 1000 CollationAPITest::TestGetAll(/* char* par */) 1001 { 1002 int32_t count1, count2; 1003 UErrorCode status = U_ZERO_ERROR; 1004 1005 logln("Trying Collator::getAvailableLocales(int&)"); 1006 1007 const Locale* list = Collator::getAvailableLocales(count1); 1008 for (int32_t i = 0; i < count1; ++i) { 1009 UnicodeString dispName; 1010 logln(UnicodeString("Locale name: ") 1011 + UnicodeString(list[i].getName()) 1012 + UnicodeString(" , the display name is : ") 1013 + UnicodeString(list[i].getDisplayName(dispName))); 1014 } 1015 1016 if (count1 == 0 || list == NULL) { 1017 dataerrln("getAvailableLocales(int&) returned an empty list"); 1018 } 1019 1020 logln("Trying Collator::getAvailableLocales()"); 1021 StringEnumeration* localeEnum = Collator::getAvailableLocales(); 1022 const UnicodeString* locStr; 1023 const char *locCStr; 1024 count2 = 0; 1025 1026 if (localeEnum == NULL) { 1027 dataerrln("getAvailableLocales() returned NULL"); 1028 return; 1029 } 1030 1031 while ((locStr = localeEnum->snext(status)) != NULL) 1032 { 1033 logln(UnicodeString("Locale name is: ") + *locStr); 1034 count2++; 1035 } 1036 if (count1 != count2) { 1037 errln("getAvailableLocales(int&) returned %d and getAvailableLocales() returned %d", count1, count2); 1038 } 1039 1040 logln("Trying Collator::getAvailableLocales() clone"); 1041 count1 = 0; 1042 StringEnumeration* localeEnum2 = localeEnum->clone(); 1043 localeEnum2->reset(status); 1044 while ((locCStr = localeEnum2->next(NULL, status)) != NULL) 1045 { 1046 logln(UnicodeString("Locale name is: ") + UnicodeString(locCStr)); 1047 count1++; 1048 } 1049 if (count1 != count2) { 1050 errln("getAvailableLocales(3rd time) returned %d and getAvailableLocales(2nd time) returned %d", count1, count2); 1051 } 1052 if (localeEnum->count(status) != count1) { 1053 errln("localeEnum->count() returned %d and getAvailableLocales() returned %d", localeEnum->count(status), count1); 1054 } 1055 delete localeEnum; 1056 delete localeEnum2; 1057 } 1058 1059 void CollationAPITest::TestSortKey() 1060 { 1061 UErrorCode status = U_ZERO_ERROR; 1062 /* 1063 this is supposed to open default date format, but later on it treats 1064 it like it is "en_US" 1065 - very bad if you try to run the tests on machine where default 1066 locale is NOT "en_US" 1067 */ 1068 Collator *col = Collator::createInstance(Locale::getEnglish(), status); 1069 if (U_FAILURE(status)) { 1070 errcheckln(status, "ERROR: Default collation creation failed.: %s\n", u_errorName(status)); 1071 return; 1072 } 1073 1074 if (col->getStrength() != Collator::TERTIARY) 1075 { 1076 errln("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n"); 1077 } 1078 1079 /* Need to use identical strength */ 1080 col->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, status); 1081 1082 UChar test1[6] = {0x41, 0x62, 0x63, 0x64, 0x61, 0}, 1083 test2[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0}, 1084 test3[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0}; 1085 1086 uint8_t sortkey1[64]; 1087 uint8_t sortkey2[64]; 1088 uint8_t sortkey3[64]; 1089 1090 logln("Use tertiary comparison level testing ....\n"); 1091 1092 CollationKey key1; 1093 col->getCollationKey(test1, u_strlen(test1), key1, status); 1094 1095 CollationKey key2; 1096 col->getCollationKey(test2, u_strlen(test2), key2, status); 1097 1098 CollationKey key3; 1099 col->getCollationKey(test3, u_strlen(test3), key3, status); 1100 1101 doAssert(key1.compareTo(key2) == Collator::GREATER, 1102 "Result should be \"Abcda\" > \"abcda\""); 1103 doAssert(key2.compareTo(key1) == Collator::LESS, 1104 "Result should be \"abcda\" < \"Abcda\""); 1105 doAssert(key2.compareTo(key3) == Collator::EQUAL, 1106 "Result should be \"abcda\" == \"abcda\""); 1107 1108 // Clone the key2 sortkey for later. 1109 int32_t keylength = 0; 1110 const uint8_t *key2primary_alias = key2.getByteArray(keylength); 1111 LocalArray<uint8_t> key2primary(new uint8_t[keylength]); 1112 memcpy(key2primary.getAlias(), key2primary_alias, keylength); 1113 1114 col->getSortKey(test1, sortkey1, 64); 1115 col->getSortKey(test2, sortkey2, 64); 1116 col->getSortKey(test3, sortkey3, 64); 1117 1118 const uint8_t *tempkey = key1.getByteArray(keylength); 1119 doAssert(memcmp(tempkey, sortkey1, keylength) == 0, 1120 "Test1 string should have the same collation key and sort key"); 1121 tempkey = key2.getByteArray(keylength); 1122 doAssert(memcmp(tempkey, sortkey2, keylength) == 0, 1123 "Test2 string should have the same collation key and sort key"); 1124 tempkey = key3.getByteArray(keylength); 1125 doAssert(memcmp(tempkey, sortkey3, keylength) == 0, 1126 "Test3 string should have the same collation key and sort key"); 1127 1128 col->getSortKey(test1, 5, sortkey1, 64); 1129 col->getSortKey(test2, 5, sortkey2, 64); 1130 col->getSortKey(test3, 5, sortkey3, 64); 1131 1132 tempkey = key1.getByteArray(keylength); 1133 doAssert(memcmp(tempkey, sortkey1, keylength) == 0, 1134 "Test1 string should have the same collation key and sort key"); 1135 tempkey = key2.getByteArray(keylength); 1136 doAssert(memcmp(tempkey, sortkey2, keylength) == 0, 1137 "Test2 string should have the same collation key and sort key"); 1138 tempkey = key3.getByteArray(keylength); 1139 doAssert(memcmp(tempkey, sortkey3, keylength) == 0, 1140 "Test3 string should have the same collation key and sort key"); 1141 1142 UnicodeString strtest1(test1); 1143 col->getSortKey(strtest1, sortkey1, 64); 1144 UnicodeString strtest2(test2); 1145 col->getSortKey(strtest2, sortkey2, 64); 1146 UnicodeString strtest3(test3); 1147 col->getSortKey(strtest3, sortkey3, 64); 1148 1149 tempkey = key1.getByteArray(keylength); 1150 doAssert(memcmp(tempkey, sortkey1, keylength) == 0, 1151 "Test1 string should have the same collation key and sort key"); 1152 tempkey = key2.getByteArray(keylength); 1153 doAssert(memcmp(tempkey, sortkey2, keylength) == 0, 1154 "Test2 string should have the same collation key and sort key"); 1155 tempkey = key3.getByteArray(keylength); 1156 doAssert(memcmp(tempkey, sortkey3, keylength) == 0, 1157 "Test3 string should have the same collation key and sort key"); 1158 1159 logln("Use secondary comparision level testing ...\n"); 1160 col->setStrength(Collator::SECONDARY); 1161 1162 col->getCollationKey(test1, u_strlen(test1), key1, status); 1163 col->getCollationKey(test2, u_strlen(test2), key2, status); 1164 col->getCollationKey(test3, u_strlen(test3), key3, status); 1165 1166 doAssert(key1.compareTo(key2) == Collator::EQUAL, 1167 "Result should be \"Abcda\" == \"abcda\""); 1168 doAssert(key2.compareTo(key3) == Collator::EQUAL, 1169 "Result should be \"abcda\" == \"abcda\""); 1170 1171 tempkey = key2.getByteArray(keylength); 1172 doAssert(memcmp(tempkey, key2primary.getAlias(), keylength - 1) == 0, 1173 "Binary format for 'abcda' sortkey different for secondary strength!"); 1174 1175 col->getSortKey(test1, sortkey1, 64); 1176 col->getSortKey(test2, sortkey2, 64); 1177 col->getSortKey(test3, sortkey3, 64); 1178 1179 tempkey = key1.getByteArray(keylength); 1180 doAssert(memcmp(tempkey, sortkey1, keylength) == 0, 1181 "Test1 string should have the same collation key and sort key"); 1182 tempkey = key2.getByteArray(keylength); 1183 doAssert(memcmp(tempkey, sortkey2, keylength) == 0, 1184 "Test2 string should have the same collation key and sort key"); 1185 tempkey = key3.getByteArray(keylength); 1186 doAssert(memcmp(tempkey, sortkey3, keylength) == 0, 1187 "Test3 string should have the same collation key and sort key"); 1188 1189 col->getSortKey(test1, 5, sortkey1, 64); 1190 col->getSortKey(test2, 5, sortkey2, 64); 1191 col->getSortKey(test3, 5, sortkey3, 64); 1192 1193 tempkey = key1.getByteArray(keylength); 1194 doAssert(memcmp(tempkey, sortkey1, keylength) == 0, 1195 "Test1 string should have the same collation key and sort key"); 1196 tempkey = key2.getByteArray(keylength); 1197 doAssert(memcmp(tempkey, sortkey2, keylength) == 0, 1198 "Test2 string should have the same collation key and sort key"); 1199 tempkey = key3.getByteArray(keylength); 1200 doAssert(memcmp(tempkey, sortkey3, keylength) == 0, 1201 "Test3 string should have the same collation key and sort key"); 1202 1203 col->getSortKey(strtest1, sortkey1, 64); 1204 col->getSortKey(strtest2, sortkey2, 64); 1205 col->getSortKey(strtest3, sortkey3, 64); 1206 1207 tempkey = key1.getByteArray(keylength); 1208 doAssert(memcmp(tempkey, sortkey1, keylength) == 0, 1209 "Test1 string should have the same collation key and sort key"); 1210 tempkey = key2.getByteArray(keylength); 1211 doAssert(memcmp(tempkey, sortkey2, keylength) == 0, 1212 "Test2 string should have the same collation key and sort key"); 1213 tempkey = key3.getByteArray(keylength); 1214 doAssert(memcmp(tempkey, sortkey3, keylength) == 0, 1215 "Test3 string should have the same collation key and sort key"); 1216 1217 logln("testing sortkey ends..."); 1218 delete col; 1219 } 1220 1221 void CollationAPITest::TestSortKeyOverflow() { 1222 IcuTestErrorCode errorCode(*this, "TestSortKeyOverflow()"); 1223 LocalPointer<Collator> col(Collator::createInstance(Locale::getEnglish(), errorCode)); 1224 if (errorCode.logDataIfFailureAndReset("Collator::createInstance(English) failed")) { 1225 return; 1226 } 1227 col->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode); 1228 UChar i_and_phi[] = { 0x438, 0x3c6 }; // Cyrillic small i & Greek small phi. 1229 // The sort key should be 6 bytes: 1230 // 2 bytes for the Cyrillic i, 1 byte for the primary-compression terminator, 1231 // 2 bytes for the Greek phi, and 1 byte for the NUL terminator. 1232 uint8_t sortKey[12]; 1233 int32_t length = col->getSortKey(i_and_phi, 2, sortKey, UPRV_LENGTHOF(sortKey)); 1234 uint8_t sortKey2[12]; 1235 for (int32_t capacity = 0; capacity < length; ++capacity) { 1236 uprv_memset(sortKey2, 2, UPRV_LENGTHOF(sortKey2)); 1237 int32_t length2 = col->getSortKey(i_and_phi, 2, sortKey2, capacity); 1238 if (length2 != length || 0 != uprv_memcmp(sortKey, sortKey2, capacity)) { 1239 errln("getSortKey(i_and_phi, capacity=%d) failed to write proper prefix", capacity); 1240 } else if (sortKey2[capacity] != 2 || sortKey2[capacity + 1] != 2) { 1241 errln("getSortKey(i_and_phi, capacity=%d) wrote beyond capacity", capacity); 1242 } 1243 } 1244 1245 // Now try to break getCollationKey(). 1246 // Internally, it always starts with a large stack buffer. 1247 // Since we cannot control the initial capacity, we throw an increasing number 1248 // of characters at it, with the problematic part at the end. 1249 const int32_t longCapacity = 2000; 1250 // Each 'a' in the prefix should result in one primary sort key byte. 1251 // For i_and_phi we expect 6 bytes, then the NUL terminator. 1252 const int32_t maxPrefixLength = longCapacity - 6 - 1; 1253 LocalArray<uint8_t> longSortKey(new uint8_t[longCapacity]); 1254 UnicodeString s(FALSE, i_and_phi, 2); 1255 for (int32_t prefixLength = 0; prefixLength < maxPrefixLength; ++prefixLength) { 1256 length = col->getSortKey(s, longSortKey.getAlias(), longCapacity); 1257 CollationKey collKey; 1258 col->getCollationKey(s, collKey, errorCode); 1259 int32_t collKeyLength; 1260 const uint8_t *collSortKey = collKey.getByteArray(collKeyLength); 1261 if (collKeyLength != length || 0 != uprv_memcmp(longSortKey.getAlias(), collSortKey, length)) { 1262 errln("getCollationKey(prefix[%d]+i_and_phi) failed to write proper sort key", prefixLength); 1263 } 1264 1265 // Insert an 'a' to match ++prefixLength. 1266 s.insert(prefixLength, (UChar)0x61); 1267 } 1268 } 1269 1270 void CollationAPITest::TestMaxExpansion() 1271 { 1272 UErrorCode status = U_ZERO_ERROR; 1273 UChar ch = 0; 1274 UChar32 unassigned = 0xEFFFD; 1275 uint32_t sorder = 0; 1276 uint32_t temporder = 0; 1277 1278 UnicodeString rule("&a < ab < c/aba < d < z < ch"); 1279 RuleBasedCollator coll(rule, status); 1280 if(U_FAILURE(status)) { 1281 errcheckln(status, "Collator creation failed with error %s", u_errorName(status)); 1282 return; 1283 } 1284 UnicodeString str(ch); 1285 CollationElementIterator *iter = 1286 coll.createCollationElementIterator(str); 1287 1288 while (ch < 0xFFFF && U_SUCCESS(status)) { 1289 int count = 1; 1290 uint32_t order; 1291 int32_t size = 0; 1292 1293 ch ++; 1294 1295 str.setCharAt(0, ch); 1296 iter->setText(str, status); 1297 order = iter->previous(status); 1298 1299 /* thai management */ 1300 if (order == 0) 1301 order = iter->previous(status); 1302 1303 while (U_SUCCESS(status) && iter->previous(status) != CollationElementIterator::NULLORDER) { 1304 count ++; 1305 } 1306 1307 size = coll.getMaxExpansion(order); 1308 if (U_FAILURE(status) || size < count) { 1309 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d", 1310 ch, size, count); 1311 } 1312 } 1313 1314 /* testing for exact max expansion */ 1315 int32_t size; 1316 ch = 0; 1317 while (ch < 0x61) { 1318 uint32_t order; 1319 str.setCharAt(0, ch); 1320 iter->setText(str, status); 1321 order = iter->previous(status); 1322 size = coll.getMaxExpansion(order); 1323 if (U_FAILURE(status) || size != 1) { 1324 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d", 1325 ch, size, 1); 1326 } 1327 ch ++; 1328 } 1329 1330 ch = 0x63; 1331 str.setTo(ch); 1332 iter->setText(str, status); 1333 temporder = iter->previous(status); 1334 size = coll.getMaxExpansion(temporder); 1335 if (U_FAILURE(status) || size != 3) { 1336 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d", 1337 ch, temporder, size, 3); 1338 } 1339 1340 ch = 0x64; 1341 str.setTo(ch); 1342 iter->setText(str, status); 1343 temporder = iter->previous(status); 1344 size = coll.getMaxExpansion(temporder); 1345 if (U_FAILURE(status) || size != 1) { 1346 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d", 1347 ch, temporder, size, 1); 1348 } 1349 1350 str.setTo(unassigned); 1351 iter->setText(str, status); 1352 sorder = iter->previous(status); 1353 size = coll.getMaxExpansion(sorder); 1354 if (U_FAILURE(status) || size != 2) { 1355 errln("Failure at supplementary codepoints, maximum expansion count %d < %d", 1356 size, 2); 1357 } 1358 1359 /* testing jamo */ 1360 ch = 0x1165; 1361 str.setTo(ch); 1362 iter->setText(str, status); 1363 temporder = iter->previous(status); 1364 size = coll.getMaxExpansion(temporder); 1365 if (U_FAILURE(status) || size > 3) { 1366 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d", 1367 ch, size, 3); 1368 } 1369 1370 delete iter; 1371 1372 /* testing special jamo &a<\u1160 */ 1373 rule = CharsToUnicodeString("\\u0026\\u0071\\u003c\\u1165\\u002f\\u0071\\u0071\\u0071\\u0071"); 1374 1375 RuleBasedCollator jamocoll(rule, status); 1376 iter = jamocoll.createCollationElementIterator(str); 1377 temporder = iter->previous(status); 1378 size = iter->getMaxExpansion(temporder); 1379 if (U_FAILURE(status) || size != 6) { 1380 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d", 1381 ch, size, 5); 1382 } 1383 1384 delete iter; 1385 } 1386 1387 void CollationAPITest::TestDisplayName() 1388 { 1389 UErrorCode error = U_ZERO_ERROR; 1390 Collator *coll = Collator::createInstance("en_US", error); 1391 if (U_FAILURE(error)) { 1392 errcheckln(error, "Failure creating english collator - %s", u_errorName(error)); 1393 return; 1394 } 1395 UnicodeString name; 1396 UnicodeString result; 1397 coll->getDisplayName(Locale::getCanadaFrench(), result); 1398 Locale::getCanadaFrench().getDisplayName(name); 1399 if (result.compare(name)) { 1400 errln("Failure getting the correct name for locale en_US"); 1401 } 1402 1403 coll->getDisplayName(Locale::getSimplifiedChinese(), result); 1404 Locale::getSimplifiedChinese().getDisplayName(name); 1405 if (result.compare(name)) { 1406 errln("Failure getting the correct name for locale zh_SG"); 1407 } 1408 delete coll; 1409 } 1410 1411 void CollationAPITest::TestAttribute() 1412 { 1413 UErrorCode error = U_ZERO_ERROR; 1414 Collator *coll = Collator::createInstance(error); 1415 1416 if (U_FAILURE(error)) { 1417 errcheckln(error, "Creation of default collator failed - %s", u_errorName(error)); 1418 return; 1419 } 1420 1421 coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_OFF, error); 1422 if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_OFF || 1423 U_FAILURE(error)) { 1424 errln("Setting and retrieving of the french collation failed"); 1425 } 1426 1427 coll->setAttribute(UCOL_FRENCH_COLLATION, UCOL_ON, error); 1428 if (coll->getAttribute(UCOL_FRENCH_COLLATION, error) != UCOL_ON || 1429 U_FAILURE(error)) { 1430 errln("Setting and retrieving of the french collation failed"); 1431 } 1432 1433 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, error); 1434 if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_SHIFTED || 1435 U_FAILURE(error)) { 1436 errln("Setting and retrieving of the alternate handling failed"); 1437 } 1438 1439 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, error); 1440 if (coll->getAttribute(UCOL_ALTERNATE_HANDLING, error) != UCOL_NON_IGNORABLE || 1441 U_FAILURE(error)) { 1442 errln("Setting and retrieving of the alternate handling failed"); 1443 } 1444 1445 coll->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, error); 1446 if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_LOWER_FIRST || 1447 U_FAILURE(error)) { 1448 errln("Setting and retrieving of the case first attribute failed"); 1449 } 1450 1451 coll->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, error); 1452 if (coll->getAttribute(UCOL_CASE_FIRST, error) != UCOL_UPPER_FIRST || 1453 U_FAILURE(error)) { 1454 errln("Setting and retrieving of the case first attribute failed"); 1455 } 1456 1457 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, error); 1458 if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_ON || 1459 U_FAILURE(error)) { 1460 errln("Setting and retrieving of the case level attribute failed"); 1461 } 1462 1463 coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, error); 1464 if (coll->getAttribute(UCOL_CASE_LEVEL, error) != UCOL_OFF || 1465 U_FAILURE(error)) { 1466 errln("Setting and retrieving of the case level attribute failed"); 1467 } 1468 1469 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, error); 1470 if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_ON || 1471 U_FAILURE(error)) { 1472 errln("Setting and retrieving of the normalization on/off attribute failed"); 1473 } 1474 1475 coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, error); 1476 if (coll->getAttribute(UCOL_NORMALIZATION_MODE, error) != UCOL_OFF || 1477 U_FAILURE(error)) { 1478 errln("Setting and retrieving of the normalization on/off attribute failed"); 1479 } 1480 1481 coll->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, error); 1482 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_PRIMARY || 1483 U_FAILURE(error)) { 1484 errln("Setting and retrieving of the collation strength failed"); 1485 } 1486 1487 coll->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, error); 1488 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_SECONDARY || 1489 U_FAILURE(error)) { 1490 errln("Setting and retrieving of the collation strength failed"); 1491 } 1492 1493 coll->setAttribute(UCOL_STRENGTH, UCOL_TERTIARY, error); 1494 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_TERTIARY || 1495 U_FAILURE(error)) { 1496 errln("Setting and retrieving of the collation strength failed"); 1497 } 1498 1499 coll->setAttribute(UCOL_STRENGTH, UCOL_QUATERNARY, error); 1500 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_QUATERNARY || 1501 U_FAILURE(error)) { 1502 errln("Setting and retrieving of the collation strength failed"); 1503 } 1504 1505 coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, error); 1506 if (coll->getAttribute(UCOL_STRENGTH, error) != UCOL_IDENTICAL || 1507 U_FAILURE(error)) { 1508 errln("Setting and retrieving of the collation strength failed"); 1509 } 1510 1511 delete coll; 1512 } 1513 1514 void CollationAPITest::TestVariableTopSetting() { 1515 UErrorCode status = U_ZERO_ERROR; 1516 1517 UChar vt[256] = { 0 }; 1518 1519 // Use the root collator, not the default collator. 1520 // This test fails with en_US_POSIX which tailors the dollar sign after 'A'. 1521 Collator *coll = Collator::createInstance(Locale::getRoot(), status); 1522 if(U_FAILURE(status)) { 1523 delete coll; 1524 errcheckln(status, "Collator creation failed with error %s", u_errorName(status)); 1525 return; 1526 } 1527 1528 uint32_t oldVarTop = coll->getVariableTop(status); 1529 1530 // ICU 53+: The character must be in a supported reordering group, 1531 // and the variable top is pinned to the end of that group. 1532 vt[0] = 0x0041; 1533 1534 (void)coll->setVariableTop(vt, 1, status); 1535 if(status != U_ILLEGAL_ARGUMENT_ERROR) { 1536 errln("setVariableTop(letter) did not detect illegal argument - %s", u_errorName(status)); 1537 } 1538 1539 status = U_ZERO_ERROR; 1540 vt[0] = 0x24; // dollar sign (currency symbol) 1541 uint32_t newVarTop = coll->setVariableTop(vt, 1, status); 1542 if(U_FAILURE(status)) { 1543 errln("setVariableTop(dollar sign) failed: %s", u_errorName(status)); 1544 return; 1545 } 1546 if(newVarTop != coll->getVariableTop(status)) { 1547 errln("setVariableTop(dollar sign) != following getVariableTop()"); 1548 } 1549 1550 UnicodeString dollar((UChar)0x24); 1551 UnicodeString euro((UChar)0x20AC); 1552 uint32_t newVarTop2 = coll->setVariableTop(euro, status); 1553 assertEquals("setVariableTop(Euro sign) == following getVariableTop()", 1554 (int64_t)newVarTop2, (int64_t)coll->getVariableTop(status)); 1555 assertEquals("setVariableTop(Euro sign) == setVariableTop(dollar sign) (should pin to top of currency group)", 1556 (int64_t)newVarTop2, (int64_t)newVarTop); 1557 1558 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status); 1559 assertEquals("empty==dollar", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), dollar)); 1560 assertEquals("empty==euro", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), euro)); 1561 assertEquals("dollar<zero", (int32_t)UCOL_LESS, (int32_t)coll->compare(dollar, UnicodeString((UChar)0x30))); 1562 1563 coll->setVariableTop(oldVarTop, status); 1564 1565 uint32_t newerVarTop = coll->setVariableTop(UnicodeString(vt, 1), status); 1566 1567 if(newVarTop != newerVarTop) { 1568 errln("Didn't set vartop properly from UnicodeString!\n"); 1569 } 1570 1571 delete coll; 1572 1573 } 1574 1575 void CollationAPITest::TestMaxVariable() { 1576 UErrorCode errorCode = U_ZERO_ERROR; 1577 LocalPointer<Collator> coll(Collator::createInstance(Locale::getRoot(), errorCode)); 1578 if(U_FAILURE(errorCode)) { 1579 errcheckln(errorCode, "Collator creation failed with error %s", u_errorName(errorCode)); 1580 return; 1581 } 1582 1583 (void)coll->setMaxVariable(UCOL_REORDER_CODE_OTHERS, errorCode); 1584 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 1585 errln("setMaxVariable(others) did not detect illegal argument - %s", u_errorName(errorCode)); 1586 } 1587 1588 errorCode = U_ZERO_ERROR; 1589 (void)coll->setMaxVariable(UCOL_REORDER_CODE_CURRENCY, errorCode); 1590 1591 if(UCOL_REORDER_CODE_CURRENCY != coll->getMaxVariable()) { 1592 errln("setMaxVariable(currency) != following getMaxVariable()"); 1593 } 1594 1595 coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, errorCode); 1596 assertEquals("empty==dollar", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), UnicodeString((UChar)0x24))); 1597 assertEquals("empty==euro", (int32_t)UCOL_EQUAL, (int32_t)coll->compare(UnicodeString(), UnicodeString((UChar)0x20AC))); 1598 assertEquals("dollar<zero", (int32_t)UCOL_LESS, (int32_t)coll->compare(UnicodeString((UChar)0x24), UnicodeString((UChar)0x30))); 1599 } 1600 1601 void CollationAPITest::TestGetLocale() { 1602 UErrorCode status = U_ZERO_ERROR; 1603 const char *rules = "&a<x<y<z"; 1604 UChar rlz[256] = {0}; 1605 1606 Collator *coll = Collator::createInstance("root", status); 1607 if(U_FAILURE(status)) { 1608 dataerrln("Failed to open collator for \"root\" with %s", u_errorName(status)); 1609 return; 1610 } 1611 Locale locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status); 1612 if(locale != Locale::getRoot()) { 1613 errln("Collator::createInstance(\"root\").getLocale(actual) != Locale::getRoot(); " 1614 "getLocale().getName() = \"%s\"", 1615 locale.getName()); 1616 } 1617 delete coll; 1618 1619 coll = Collator::createInstance("", status); 1620 if(U_FAILURE(status)) { 1621 dataerrln("Failed to open collator for \"\" with %s", u_errorName(status)); 1622 return; 1623 } 1624 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status); 1625 if(locale != Locale::getRoot()) { 1626 errln("Collator::createInstance(\"\").getLocale(actual) != Locale::getRoot(); " 1627 "getLocale().getName() = \"%s\"", 1628 locale.getName()); 1629 } 1630 delete coll; 1631 1632 int32_t i = 0; 1633 1634 static const struct { 1635 const char* requestedLocale; 1636 const char* validLocale; 1637 const char* actualLocale; 1638 } testStruct[] = { 1639 // Note: Locale::getRoot().getName() == "" not "root". 1640 { "de_DE", "de", "" }, 1641 { "sr_RS", "sr_Cyrl_RS", "sr" }, 1642 { "en_US_CALIFORNIA", "en_US", "" }, 1643 { "fr_FR_NONEXISTANT", "fr", "" }, 1644 // pinyin is the default, therefore suppressed. 1645 { "zh_CN", "zh_Hans_CN", "zh" }, 1646 // zh_Hant has default=stroke but the data is in zh. 1647 { "zh_TW", "zh_Hant_TW", "zh@collation=stroke" }, 1648 { "zh_TW@collation=pinyin", "zh_Hant_TW@collation=pinyin", "zh" }, 1649 { "zh_CN@collation=stroke", "zh_Hans_CN@collation=stroke", "zh@collation=stroke" } 1650 }; 1651 1652 u_unescape(rules, rlz, 256); 1653 1654 /* test opening collators for different locales */ 1655 for(i = 0; i<UPRV_LENGTHOF(testStruct); i++) { 1656 status = U_ZERO_ERROR; 1657 coll = Collator::createInstance(testStruct[i].requestedLocale, status); 1658 if(U_FAILURE(status)) { 1659 errln("Failed to open collator for %s with %s", testStruct[i].requestedLocale, u_errorName(status)); 1660 delete coll; 1661 continue; 1662 } 1663 // The requested locale may be the same as the valid locale, 1664 // or may not be supported at all. See ticket #10477. 1665 locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status); 1666 if(U_SUCCESS(status) && 1667 locale != testStruct[i].requestedLocale && locale != testStruct[i].validLocale) { 1668 errln("[Coll %s]: Error in requested locale, expected %s or %s, got %s", 1669 testStruct[i].requestedLocale, 1670 testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName()); 1671 } 1672 status = U_ZERO_ERROR; 1673 locale = coll->getLocale(ULOC_VALID_LOCALE, status); 1674 if(locale != testStruct[i].validLocale) { 1675 errln("[Coll %s]: Error in valid locale, expected %s, got %s", 1676 testStruct[i].requestedLocale, testStruct[i].validLocale, locale.getName()); 1677 } 1678 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status); 1679 if(locale != testStruct[i].actualLocale) { 1680 errln("[Coll %s]: Error in actual locale, expected %s, got %s", 1681 testStruct[i].requestedLocale, testStruct[i].actualLocale, locale.getName()); 1682 } 1683 // If we open a collator for the actual locale, we should get an equivalent one again. 1684 LocalPointer<Collator> coll2(Collator::createInstance(locale, status)); 1685 if(U_FAILURE(status)) { 1686 errln("Failed to open collator for actual locale \"%s\" with %s", 1687 locale.getName(), u_errorName(status)); 1688 } else { 1689 Locale actual2 = coll2->getLocale(ULOC_ACTUAL_LOCALE, status); 1690 if(actual2 != locale) { 1691 errln("[Coll actual \"%s\"]: Error in actual locale, got different one: \"%s\"", 1692 locale.getName(), actual2.getName()); 1693 } 1694 if(*coll2 != *coll) { 1695 errln("[Coll actual \"%s\"]: Got different collator than before", locale.getName()); 1696 } 1697 } 1698 delete coll; 1699 } 1700 1701 /* completely non-existent locale for collator should get a root collator */ 1702 { 1703 LocalPointer<Collator> coll(Collator::createInstance("blahaha", status)); 1704 if(U_FAILURE(status)) { 1705 errln("Failed to open collator with %s", u_errorName(status)); 1706 return; 1707 } 1708 Locale valid = coll->getLocale(ULOC_VALID_LOCALE, status); 1709 const char *name = valid.getName(); 1710 if(*name != 0 && strcmp(name, "root") != 0) { 1711 errln("Valid locale for nonexisting-locale collator is \"%s\" not root", name); 1712 } 1713 Locale actual = coll->getLocale(ULOC_ACTUAL_LOCALE, status); 1714 name = actual.getName(); 1715 if(*name != 0 && strcmp(name, "root") != 0) { 1716 errln("Actual locale for nonexisting-locale collator is \"%s\" not root", name); 1717 } 1718 } 1719 1720 1721 1722 /* collator instantiated from rules should have all three locales NULL */ 1723 coll = new RuleBasedCollator(rlz, status); 1724 locale = coll->getLocale(ULOC_REQUESTED_LOCALE, status); 1725 if(U_SUCCESS(status) && !locale.isBogus()) { 1726 errln("For collator instantiated from rules, requested locale %s is not bogus", locale.getName()); 1727 } 1728 status = U_ZERO_ERROR; 1729 locale = coll->getLocale(ULOC_VALID_LOCALE, status); 1730 if(!locale.isBogus()) { 1731 errln("For collator instantiated from rules, valid locale %s is not bogus", locale.getName()); 1732 } 1733 locale = coll->getLocale(ULOC_ACTUAL_LOCALE, status); 1734 if(!locale.isBogus()) { 1735 errln("For collator instantiated from rules, actual locale %s is not bogus", locale.getName()); 1736 } 1737 delete coll; 1738 } 1739 1740 struct teststruct { 1741 const char *original; 1742 uint8_t key[256]; 1743 }; 1744 1745 1746 1747 U_CDECL_BEGIN 1748 static int U_CALLCONV 1749 compare_teststruct(const void *string1, const void *string2) { 1750 return(strcmp((const char *)((struct teststruct *)string1)->key, (const char *)((struct teststruct *)string2)->key)); 1751 } 1752 U_CDECL_END 1753 1754 1755 void CollationAPITest::TestBounds(void) { 1756 UErrorCode status = U_ZERO_ERROR; 1757 1758 Collator *coll = Collator::createInstance(Locale("sh"), status); 1759 if(U_FAILURE(status)) { 1760 delete coll; 1761 errcheckln(status, "Collator creation failed with %s", u_errorName(status)); 1762 return; 1763 } 1764 1765 uint8_t sortkey[512], lower[512], upper[512]; 1766 UChar buffer[512]; 1767 1768 static const char * const test[] = { 1769 "John Smith", 1770 "JOHN SMITH", 1771 "john SMITH", 1772 "j\\u00F6hn sm\\u00EFth", 1773 "J\\u00F6hn Sm\\u00EFth", 1774 "J\\u00D6HN SM\\u00CFTH", 1775 "john smithsonian", 1776 "John Smithsonian" 1777 }; 1778 1779 struct teststruct tests[] = { 1780 {"\\u010CAKI MIHALJ", {0}}, 1781 {"\\u010CAKI MIHALJ", {0}}, 1782 {"\\u010CAKI PIRO\\u0160KA", {0}}, 1783 {"\\u010CABAI ANDRIJA", {0}}, 1784 {"\\u010CABAI LAJO\\u0160", {0}}, 1785 {"\\u010CABAI MARIJA", {0}}, 1786 {"\\u010CABAI STEVAN", {0}}, 1787 {"\\u010CABAI STEVAN", {0}}, 1788 {"\\u010CABARKAPA BRANKO", {0}}, 1789 {"\\u010CABARKAPA MILENKO", {0}}, 1790 {"\\u010CABARKAPA MIROSLAV", {0}}, 1791 {"\\u010CABARKAPA SIMO", {0}}, 1792 {"\\u010CABARKAPA STANKO", {0}}, 1793 {"\\u010CABARKAPA TAMARA", {0}}, 1794 {"\\u010CABARKAPA TOMA\\u0160", {0}}, 1795 {"\\u010CABDARI\\u0106 NIKOLA", {0}}, 1796 {"\\u010CABDARI\\u0106 ZORICA", {0}}, 1797 {"\\u010CABI NANDOR", {0}}, 1798 {"\\u010CABOVI\\u0106 MILAN", {0}}, 1799 {"\\u010CABRADI AGNEZIJA", {0}}, 1800 {"\\u010CABRADI IVAN", {0}}, 1801 {"\\u010CABRADI JELENA", {0}}, 1802 {"\\u010CABRADI LJUBICA", {0}}, 1803 {"\\u010CABRADI STEVAN", {0}}, 1804 {"\\u010CABRDA MARTIN", {0}}, 1805 {"\\u010CABRILO BOGDAN", {0}}, 1806 {"\\u010CABRILO BRANISLAV", {0}}, 1807 {"\\u010CABRILO LAZAR", {0}}, 1808 {"\\u010CABRILO LJUBICA", {0}}, 1809 {"\\u010CABRILO SPASOJA", {0}}, 1810 {"\\u010CADE\\u0160 ZDENKA", {0}}, 1811 {"\\u010CADESKI BLAGOJE", {0}}, 1812 {"\\u010CADOVSKI VLADIMIR", {0}}, 1813 {"\\u010CAGLJEVI\\u0106 TOMA", {0}}, 1814 {"\\u010CAGOROVI\\u0106 VLADIMIR", {0}}, 1815 {"\\u010CAJA VANKA", {0}}, 1816 {"\\u010CAJI\\u0106 BOGOLJUB", {0}}, 1817 {"\\u010CAJI\\u0106 BORISLAV", {0}}, 1818 {"\\u010CAJI\\u0106 RADOSLAV", {0}}, 1819 {"\\u010CAK\\u0160IRAN MILADIN", {0}}, 1820 {"\\u010CAKAN EUGEN", {0}}, 1821 {"\\u010CAKAN EVGENIJE", {0}}, 1822 {"\\u010CAKAN IVAN", {0}}, 1823 {"\\u010CAKAN JULIJAN", {0}}, 1824 {"\\u010CAKAN MIHAJLO", {0}}, 1825 {"\\u010CAKAN STEVAN", {0}}, 1826 {"\\u010CAKAN VLADIMIR", {0}}, 1827 {"\\u010CAKAN VLADIMIR", {0}}, 1828 {"\\u010CAKAN VLADIMIR", {0}}, 1829 {"\\u010CAKARA ANA", {0}}, 1830 {"\\u010CAKAREVI\\u0106 MOMIR", {0}}, 1831 {"\\u010CAKAREVI\\u0106 NEDELJKO", {0}}, 1832 {"\\u010CAKI \\u0160ANDOR", {0}}, 1833 {"\\u010CAKI AMALIJA", {0}}, 1834 {"\\u010CAKI ANDRA\\u0160", {0}}, 1835 {"\\u010CAKI LADISLAV", {0}}, 1836 {"\\u010CAKI LAJO\\u0160", {0}}, 1837 {"\\u010CAKI LASLO", {0}} 1838 }; 1839 1840 1841 1842 int32_t i = 0, j = 0, k = 0, buffSize = 0, skSize = 0, lowerSize = 0, upperSize = 0; 1843 int32_t arraySize = UPRV_LENGTHOF(tests); 1844 1845 (void)lowerSize; // Suppress unused variable warnings. 1846 (void)upperSize; 1847 1848 for(i = 0; i<arraySize; i++) { 1849 buffSize = u_unescape(tests[i].original, buffer, 512); 1850 skSize = coll->getSortKey(buffer, buffSize, tests[i].key, 512); 1851 } 1852 1853 qsort(tests, arraySize, sizeof(struct teststruct), compare_teststruct); 1854 1855 for(i = 0; i < arraySize-1; i++) { 1856 for(j = i+1; j < arraySize; j++) { 1857 lowerSize = coll->getBound(tests[i].key, -1, UCOL_BOUND_LOWER, 1, lower, 512, status); 1858 upperSize = coll->getBound(tests[j].key, -1, UCOL_BOUND_UPPER, 1, upper, 512, status); 1859 for(k = i; k <= j; k++) { 1860 if(strcmp((const char *)lower, (const char *)tests[k].key) > 0) { 1861 errln("Problem with lower! j = %i (%s vs %s)", k, tests[k].original, tests[i].original); 1862 } 1863 if(strcmp((const char *)upper, (const char *)tests[k].key) <= 0) { 1864 errln("Problem with upper! j = %i (%s vs %s)", k, tests[k].original, tests[j].original); 1865 } 1866 } 1867 } 1868 } 1869 1870 1871 for(i = 0; i<UPRV_LENGTHOF(test); i++) { 1872 buffSize = u_unescape(test[i], buffer, 512); 1873 skSize = coll->getSortKey(buffer, buffSize, sortkey, 512); 1874 lowerSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_LOWER, 1, lower, 512, &status); 1875 upperSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_UPPER_LONG, 1, upper, 512, &status); 1876 for(j = i+1; j<UPRV_LENGTHOF(test); j++) { 1877 buffSize = u_unescape(test[j], buffer, 512); 1878 skSize = coll->getSortKey(buffer, buffSize, sortkey, 512); 1879 if(strcmp((const char *)lower, (const char *)sortkey) > 0) { 1880 errln("Problem with lower! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]); 1881 } 1882 if(strcmp((const char *)upper, (const char *)sortkey) <= 0) { 1883 errln("Problem with upper! i = %i, j = %i (%s vs %s)", i, j, test[i], test[j]); 1884 } 1885 } 1886 } 1887 delete coll; 1888 } 1889 1890 1891 void CollationAPITest::TestGetTailoredSet() 1892 { 1893 struct { 1894 const char *rules; 1895 const char *tests[20]; 1896 int32_t testsize; 1897 } setTest[] = { 1898 { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3}, 1899 { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4} 1900 }; 1901 1902 int32_t i = 0, j = 0; 1903 UErrorCode status = U_ZERO_ERROR; 1904 1905 UnicodeString buff; 1906 UnicodeSet *set = NULL; 1907 1908 for(i = 0; i < UPRV_LENGTHOF(setTest); i++) { 1909 buff = UnicodeString(setTest[i].rules, -1, US_INV).unescape(); 1910 RuleBasedCollator coll(buff, status); 1911 if(U_SUCCESS(status)) { 1912 set = coll.getTailoredSet(status); 1913 if(set->size() < setTest[i].testsize) { 1914 errln("Tailored set size smaller (%d) than expected (%d)", set->size(), setTest[i].testsize); 1915 } 1916 for(j = 0; j < setTest[i].testsize; j++) { 1917 buff = UnicodeString(setTest[i].tests[j], -1, US_INV).unescape(); 1918 if(!set->contains(buff)) { 1919 errln("Tailored set doesn't contain %s... It should", setTest[i].tests[j]); 1920 } 1921 } 1922 delete set; 1923 } else { 1924 errcheckln(status, "Couldn't open collator with rules %s - %s", setTest[i].rules, u_errorName(status)); 1925 } 1926 } 1927 } 1928 1929 void CollationAPITest::TestUClassID() 1930 { 1931 char id = *((char *)RuleBasedCollator::getStaticClassID()); 1932 if (id != 0) { 1933 errln("Static class id for RuleBasedCollator should be 0"); 1934 } 1935 UErrorCode status = U_ZERO_ERROR; 1936 RuleBasedCollator *coll 1937 = (RuleBasedCollator *)Collator::createInstance(status); 1938 if(U_FAILURE(status)) { 1939 delete coll; 1940 errcheckln(status, "Collator creation failed with %s", u_errorName(status)); 1941 return; 1942 } 1943 id = *((char *)coll->getDynamicClassID()); 1944 if (id != 0) { 1945 errln("Dynamic class id for RuleBasedCollator should be 0"); 1946 } 1947 id = *((char *)CollationKey::getStaticClassID()); 1948 if (id != 0) { 1949 errln("Static class id for CollationKey should be 0"); 1950 } 1951 CollationKey *key = new CollationKey(); 1952 id = *((char *)key->getDynamicClassID()); 1953 if (id != 0) { 1954 errln("Dynamic class id for CollationKey should be 0"); 1955 } 1956 id = *((char *)CollationElementIterator::getStaticClassID()); 1957 if (id != 0) { 1958 errln("Static class id for CollationElementIterator should be 0"); 1959 } 1960 UnicodeString str("testing"); 1961 CollationElementIterator *iter = coll->createCollationElementIterator(str); 1962 id = *((char *)iter->getDynamicClassID()); 1963 if (id != 0) { 1964 errln("Dynamic class id for CollationElementIterator should be 0"); 1965 } 1966 delete key; 1967 delete iter; 1968 delete coll; 1969 } 1970 1971 class TestCollator : public Collator 1972 { 1973 public: 1974 virtual Collator* clone(void) const; 1975 1976 using Collator::compare; 1977 1978 virtual UCollationResult compare(const UnicodeString& source, 1979 const UnicodeString& target, 1980 UErrorCode& status) const; 1981 virtual UCollationResult compare(const UnicodeString& source, 1982 const UnicodeString& target, 1983 int32_t length, 1984 UErrorCode& status) const; 1985 virtual UCollationResult compare(const UChar* source, 1986 int32_t sourceLength, 1987 const UChar* target, 1988 int32_t targetLength, 1989 UErrorCode& status) const; 1990 virtual CollationKey& getCollationKey(const UnicodeString& source, 1991 CollationKey& key, 1992 UErrorCode& status) const; 1993 virtual CollationKey& getCollationKey(const UChar*source, 1994 int32_t sourceLength, 1995 CollationKey& key, 1996 UErrorCode& status) const; 1997 virtual int32_t hashCode(void) const; 1998 virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; 1999 virtual ECollationStrength getStrength(void) const; 2000 virtual void setStrength(ECollationStrength newStrength); 2001 virtual UClassID getDynamicClassID(void) const; 2002 virtual void getVersion(UVersionInfo info) const; 2003 virtual void setAttribute(UColAttribute attr, UColAttributeValue value, 2004 UErrorCode &status); 2005 virtual UColAttributeValue getAttribute(UColAttribute attr, 2006 UErrorCode &status) const; 2007 virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, 2008 UErrorCode &status); 2009 virtual uint32_t setVariableTop(const UnicodeString &varTop, 2010 UErrorCode &status); 2011 virtual void setVariableTop(uint32_t varTop, UErrorCode &status); 2012 virtual uint32_t getVariableTop(UErrorCode &status) const; 2013 virtual int32_t getSortKey(const UnicodeString& source, 2014 uint8_t* result, 2015 int32_t resultLength) const; 2016 virtual int32_t getSortKey(const UChar*source, int32_t sourceLength, 2017 uint8_t*result, int32_t resultLength) const; 2018 virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; 2019 virtual UBool operator==(const Collator& other) const; 2020 // Collator::operator!= calls !Collator::operator== which works for all subclasses. 2021 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); 2022 TestCollator() : Collator() {}; 2023 TestCollator(UCollationStrength collationStrength, 2024 UNormalizationMode decompositionMode) : Collator(collationStrength, decompositionMode) {}; 2025 }; 2026 2027 inline UBool TestCollator::operator==(const Collator& other) const { 2028 // TestCollator has no fields, so we test for identity. 2029 return this == &other; 2030 2031 // Normally, subclasses should do something like the following: 2032 // if (this == &other) { return TRUE; } 2033 // if (!Collator::operator==(other)) { return FALSE; } // not the same class 2034 // 2035 // const TestCollator &o = (const TestCollator&)other; 2036 // (compare this vs. o's subclass fields) 2037 } 2038 2039 Collator* TestCollator::clone() const 2040 { 2041 return new TestCollator(); 2042 } 2043 2044 UCollationResult TestCollator::compare(const UnicodeString& source, 2045 const UnicodeString& target, 2046 UErrorCode& status) const 2047 { 2048 if(U_SUCCESS(status)) { 2049 return UCollationResult(source.compare(target)); 2050 } else { 2051 return UCOL_EQUAL; 2052 } 2053 } 2054 2055 UCollationResult TestCollator::compare(const UnicodeString& source, 2056 const UnicodeString& target, 2057 int32_t length, 2058 UErrorCode& status) const 2059 { 2060 if(U_SUCCESS(status)) { 2061 return UCollationResult(source.compare(0, length, target)); 2062 } else { 2063 return UCOL_EQUAL; 2064 } 2065 } 2066 2067 UCollationResult TestCollator::compare(const UChar* source, 2068 int32_t sourceLength, 2069 const UChar* target, 2070 int32_t targetLength, 2071 UErrorCode& status) const 2072 { 2073 UnicodeString s(source, sourceLength); 2074 UnicodeString t(target, targetLength); 2075 return compare(s, t, status); 2076 } 2077 2078 CollationKey& TestCollator::getCollationKey(const UnicodeString& source, 2079 CollationKey& key, 2080 UErrorCode& status) const 2081 { 2082 char temp[100]; 2083 int length = 100; 2084 length = source.extract(temp, length, NULL, status); 2085 temp[length] = 0; 2086 CollationKey tempkey((uint8_t*)temp, length); 2087 key = tempkey; 2088 return key; 2089 } 2090 2091 CollationKey& TestCollator::getCollationKey(const UChar*source, 2092 int32_t sourceLength, 2093 CollationKey& key, 2094 UErrorCode& status) const 2095 { 2096 //s tack allocation used since collationkey does not keep the unicodestring 2097 UnicodeString str(source, sourceLength); 2098 return getCollationKey(str, key, status); 2099 } 2100 2101 int32_t TestCollator::getSortKey(const UnicodeString& source, uint8_t* result, 2102 int32_t resultLength) const 2103 { 2104 UErrorCode status = U_ZERO_ERROR; 2105 int32_t length = source.extract((char *)result, resultLength, NULL, 2106 status); 2107 result[length] = 0; 2108 return length; 2109 } 2110 2111 int32_t TestCollator::getSortKey(const UChar*source, int32_t sourceLength, 2112 uint8_t*result, int32_t resultLength) const 2113 { 2114 UnicodeString str(source, sourceLength); 2115 return getSortKey(str, result, resultLength); 2116 } 2117 2118 int32_t TestCollator::hashCode() const 2119 { 2120 return 0; 2121 } 2122 2123 Locale TestCollator::getLocale(ULocDataLocaleType type, UErrorCode& status) const 2124 { 2125 // api not used, this is to make the compiler happy 2126 if (U_FAILURE(status)) { 2127 (void)type; 2128 } 2129 return NULL; 2130 } 2131 2132 Collator::ECollationStrength TestCollator::getStrength() const 2133 { 2134 return TERTIARY; 2135 } 2136 2137 void TestCollator::setStrength(Collator::ECollationStrength newStrength) 2138 { 2139 // api not used, this is to make the compiler happy 2140 (void)newStrength; 2141 } 2142 2143 UClassID TestCollator::getDynamicClassID(void) const 2144 { 2145 return 0; 2146 } 2147 2148 void TestCollator::getVersion(UVersionInfo info) const 2149 { 2150 // api not used, this is to make the compiler happy 2151 memset(info, 0, U_MAX_VERSION_LENGTH); 2152 } 2153 2154 void TestCollator::setAttribute(UColAttribute /*attr*/, UColAttributeValue /*value*/, 2155 UErrorCode & /*status*/) 2156 { 2157 } 2158 2159 UColAttributeValue TestCollator::getAttribute(UColAttribute attr, 2160 UErrorCode &status) const 2161 { 2162 // api not used, this is to make the compiler happy 2163 if (U_FAILURE(status) || attr == UCOL_ATTRIBUTE_COUNT) { 2164 return UCOL_OFF; 2165 } 2166 return UCOL_DEFAULT; 2167 } 2168 2169 uint32_t TestCollator::setVariableTop(const UChar *varTop, int32_t len, 2170 UErrorCode &status) 2171 { 2172 // api not used, this is to make the compiler happy 2173 if (U_SUCCESS(status) && (varTop == 0 || len < -1)) { 2174 status = U_ILLEGAL_ARGUMENT_ERROR; 2175 } 2176 return 0; 2177 } 2178 2179 uint32_t TestCollator::setVariableTop(const UnicodeString &varTop, 2180 UErrorCode &status) 2181 { 2182 // api not used, this is to make the compiler happy 2183 if (U_SUCCESS(status) && varTop.length() == 0) { 2184 status = U_ILLEGAL_ARGUMENT_ERROR; 2185 } 2186 return 0; 2187 } 2188 2189 void TestCollator::setVariableTop(uint32_t varTop, UErrorCode &status) 2190 { 2191 // api not used, this is to make the compiler happy 2192 if (U_SUCCESS(status) && varTop == 0) { 2193 status = U_ILLEGAL_ARGUMENT_ERROR; 2194 } 2195 } 2196 2197 uint32_t TestCollator::getVariableTop(UErrorCode &status) const 2198 { 2199 2200 // api not used, this is to make the compiler happy 2201 if (U_SUCCESS(status)) { 2202 return 0; 2203 } 2204 return (uint32_t)(0xFFFFFFFFu); 2205 } 2206 2207 UnicodeSet * TestCollator::getTailoredSet(UErrorCode &status) const 2208 { 2209 return Collator::getTailoredSet(status); 2210 } 2211 2212 void TestCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) 2213 { 2214 Collator::setLocales(requestedLocale, validLocale, actualLocale); 2215 } 2216 2217 2218 void CollationAPITest::TestSubclass() 2219 { 2220 TestCollator col1; 2221 TestCollator col2; 2222 doAssert(col1 != col2, "2 instances of TestCollator should be different"); 2223 if (col1.hashCode() != col2.hashCode()) { 2224 errln("Every TestCollator has the same hashcode"); 2225 } 2226 UnicodeString abc("abc", 3); 2227 UnicodeString bcd("bcd", 3); 2228 if (col1.compare(abc, bcd) != abc.compare(bcd)) { 2229 errln("TestCollator compare should be the same as the default " 2230 "string comparison"); 2231 } 2232 CollationKey key; 2233 UErrorCode status = U_ZERO_ERROR; 2234 col1.getCollationKey(abc, key, status); 2235 int32_t length = 0; 2236 const char* bytes = (const char *)key.getByteArray(length); 2237 UnicodeString keyarray(bytes, length, NULL, status); 2238 if (abc != keyarray) { 2239 errln("TestCollator collationkey API is returning wrong values"); 2240 } 2241 2242 UnicodeSet expectedset(0, 0x10FFFF); 2243 UnicodeSet *defaultset = col1.getTailoredSet(status); 2244 if (!defaultset->containsAll(expectedset) 2245 || !expectedset.containsAll(*defaultset)) { 2246 errln("Error: expected default tailoring to be 0 to 0x10ffff"); 2247 } 2248 delete defaultset; 2249 2250 // use base class implementation 2251 Locale loc1 = Locale::getGermany(); 2252 Locale loc2 = Locale::getFrance(); 2253 col1.setLocales(loc1, loc2, loc2); // default implementation has no effect 2254 2255 UnicodeString displayName; 2256 col1.getDisplayName(loc1, loc2, displayName); // de_DE collator in fr_FR locale 2257 2258 TestCollator col3(UCOL_TERTIARY, UNORM_NONE); 2259 UnicodeString a("a"); 2260 UnicodeString b("b"); 2261 Collator::EComparisonResult result = Collator::EComparisonResult(a.compare(b)); 2262 if(col1.compare(a, b) != result) { 2263 errln("Collator doesn't give default result"); 2264 } 2265 if(col1.compare(a, b, 1) != result) { 2266 errln("Collator doesn't give default result"); 2267 } 2268 if(col1.compare(a.getBuffer(), a.length(), b.getBuffer(), b.length()) != result) { 2269 errln("Collator doesn't give default result"); 2270 } 2271 } 2272 2273 void CollationAPITest::TestNULLCharTailoring() 2274 { 2275 UErrorCode status = U_ZERO_ERROR; 2276 UChar buf[256] = {0}; 2277 int32_t len = u_unescape("&a < '\\u0000'", buf, 256); 2278 UnicodeString first((UChar)0x0061); 2279 UnicodeString second((UChar)0); 2280 RuleBasedCollator *coll = new RuleBasedCollator(UnicodeString(buf, len), status); 2281 if(U_FAILURE(status)) { 2282 delete coll; 2283 errcheckln(status, "Failed to open collator - %s", u_errorName(status)); 2284 return; 2285 } 2286 UCollationResult res = coll->compare(first, second, status); 2287 if(res != UCOL_LESS) { 2288 errln("a should be less then NULL after tailoring"); 2289 } 2290 delete coll; 2291 } 2292 2293 void CollationAPITest::TestClone() { 2294 logln("\ninit c0"); 2295 UErrorCode status = U_ZERO_ERROR; 2296 RuleBasedCollator* c0 = (RuleBasedCollator*)Collator::createInstance(status); 2297 2298 if (U_FAILURE(status)) { 2299 errcheckln(status, "Collator::CreateInstance(status) failed with %s", u_errorName(status)); 2300 return; 2301 } 2302 2303 c0->setStrength(Collator::TERTIARY); 2304 dump("c0", c0, status); 2305 2306 logln("\ninit c1"); 2307 RuleBasedCollator* c1 = (RuleBasedCollator*)Collator::createInstance(status); 2308 c1->setStrength(Collator::TERTIARY); 2309 UColAttributeValue val = c1->getAttribute(UCOL_CASE_FIRST, status); 2310 if(val == UCOL_LOWER_FIRST){ 2311 c1->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); 2312 }else{ 2313 c1->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); 2314 } 2315 dump("c0", c0, status); 2316 dump("c1", c1, status); 2317 2318 logln("\ninit c2"); 2319 RuleBasedCollator* c2 = (RuleBasedCollator*)c1->clone(); 2320 val = c2->getAttribute(UCOL_CASE_FIRST, status); 2321 if(val == UCOL_LOWER_FIRST){ 2322 c2->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); 2323 }else{ 2324 c2->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); 2325 } 2326 if(U_FAILURE(status)){ 2327 errln("set and get attributes of collator failed. %s\n", u_errorName(status)); 2328 return; 2329 } 2330 dump("c0", c0, status); 2331 dump("c1", c1, status); 2332 dump("c2", c2, status); 2333 if(*c1 == *c2){ 2334 errln("The cloned objects refer to same data"); 2335 } 2336 delete c0; 2337 delete c1; 2338 delete c2; 2339 } 2340 2341 void CollationAPITest::TestCloneBinary() { 2342 IcuTestErrorCode errorCode(*this, "TestCloneBinary"); 2343 LocalPointer<Collator> root(Collator::createInstance(Locale::getRoot(), errorCode)); 2344 LocalPointer<Collator> coll(Collator::createInstance("de@collation=phonebook", errorCode)); 2345 if(errorCode.logDataIfFailureAndReset("Collator::createInstance(de@collation=phonebook)")) { 2346 return; 2347 } 2348 RuleBasedCollator *rbRoot = dynamic_cast<RuleBasedCollator *>(root.getAlias()); 2349 RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll.getAlias()); 2350 if(rbRoot == NULL || rbc == NULL) { 2351 infoln("root or de@collation=phonebook is not a RuleBasedCollator"); 2352 return; 2353 } 2354 rbc->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, errorCode); 2355 UnicodeString uUmlaut((UChar)0xfc); 2356 UnicodeString ue = UNICODE_STRING_SIMPLE("ue"); 2357 assertEquals("rbc/primary: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc->compare(uUmlaut, ue, errorCode)); 2358 uint8_t bin[25000]; 2359 int32_t binLength = rbc->cloneBinary(bin, UPRV_LENGTHOF(bin), errorCode); 2360 if(errorCode.logDataIfFailureAndReset("rbc->cloneBinary()")) { 2361 return; 2362 } 2363 logln("rbc->cloneBinary() -> %d bytes", (int)binLength); 2364 2365 RuleBasedCollator rbc2(bin, binLength, rbRoot, errorCode); 2366 if(errorCode.logDataIfFailureAndReset("RuleBasedCollator(rbc binary)")) { 2367 return; 2368 } 2369 assertEquals("rbc2.strength==primary", (int32_t)UCOL_PRIMARY, rbc2.getAttribute(UCOL_STRENGTH, errorCode)); 2370 assertEquals("rbc2: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc2.compare(uUmlaut, ue, errorCode)); 2371 assertTrue("rbc==rbc2", *rbc == rbc2); 2372 uint8_t bin2[25000]; 2373 int32_t bin2Length = rbc2.cloneBinary(bin2, UPRV_LENGTHOF(bin2), errorCode); 2374 assertEquals("len(rbc binary)==len(rbc2 binary)", binLength, bin2Length); 2375 assertTrue("rbc binary==rbc2 binary", binLength == bin2Length && memcmp(bin, bin2, binLength) == 0); 2376 2377 RuleBasedCollator rbc3(bin, -1, rbRoot, errorCode); 2378 if(errorCode.logDataIfFailureAndReset("RuleBasedCollator(rbc binary, length<0)")) { 2379 return; 2380 } 2381 assertEquals("rbc3.strength==primary", (int32_t)UCOL_PRIMARY, rbc3.getAttribute(UCOL_STRENGTH, errorCode)); 2382 assertEquals("rbc3: u-umlaut==ue", (int32_t)UCOL_EQUAL, rbc3.compare(uUmlaut, ue, errorCode)); 2383 assertTrue("rbc==rbc3", *rbc == rbc3); 2384 } 2385 2386 void CollationAPITest::TestIterNumeric() { 2387 // Regression test for ticket #9915. 2388 // The collation code sometimes masked the continuation marker away 2389 // but later tested the result for isContinuation(). 2390 // This test case failed because the third bytes of the computed numeric-collation primaries 2391 // were permutated with the script reordering table. 2392 // It should have been possible to reproduce this with the root collator 2393 // and characters with appropriate 3-byte primary weights. 2394 // The effectiveness of this test depends completely on the collation elements 2395 // and on the implementation code. 2396 IcuTestErrorCode errorCode(*this, "TestIterNumeric"); 2397 RuleBasedCollator coll(UnicodeString("[reorder Hang Hani]"), errorCode); 2398 if(errorCode.logDataIfFailureAndReset("RuleBasedCollator constructor")) { 2399 return; 2400 } 2401 coll.setAttribute(UCOL_NUMERIC_COLLATION, UCOL_ON, errorCode); 2402 UCharIterator iter40, iter72; 2403 uiter_setUTF8(&iter40, "\x34\x30", 2); 2404 uiter_setUTF8(&iter72, "\x37\x32", 2); 2405 UCollationResult result = coll.compare(iter40, iter72, errorCode); 2406 assertEquals("40<72", (int32_t)UCOL_LESS, (int32_t)result); 2407 } 2408 2409 void CollationAPITest::TestBadKeywords() { 2410 // Test locale IDs with errors. 2411 // Valid locale IDs are tested via data-driven tests. 2412 UErrorCode errorCode = U_ZERO_ERROR; 2413 Locale bogusLocale(Locale::getRoot()); 2414 bogusLocale.setToBogus(); 2415 LocalPointer<Collator> coll(Collator::createInstance(bogusLocale, errorCode)); 2416 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 2417 errln("Collator::createInstance(bogus locale) did not fail as expected - %s", 2418 u_errorName(errorCode)); 2419 } 2420 2421 // Unknown value. 2422 const char *localeID = "it-u-ks-xyz"; 2423 errorCode = U_ZERO_ERROR; 2424 coll.adoptInstead(Collator::createInstance(localeID, errorCode)); 2425 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 2426 dataerrln("Collator::createInstance(%s) did not fail as expected - %s", 2427 localeID, u_errorName(errorCode)); 2428 } 2429 2430 // Unsupported attributes. 2431 localeID = "it@colHiraganaQuaternary=true"; 2432 errorCode = U_ZERO_ERROR; 2433 coll.adoptInstead(Collator::createInstance(localeID, errorCode)); 2434 if(errorCode != U_UNSUPPORTED_ERROR) { 2435 if (errorCode == U_FILE_ACCESS_ERROR) { 2436 dataerrln("Collator::createInstance(it@colHiraganaQuaternary=true) : %s", u_errorName(errorCode)); 2437 } else { 2438 errln("Collator::createInstance(%s) did not fail as expected - %s", 2439 localeID, u_errorName(errorCode)); 2440 } 2441 } 2442 2443 localeID = "it-u-vt-u24"; 2444 errorCode = U_ZERO_ERROR; 2445 coll.adoptInstead(Collator::createInstance(localeID, errorCode)); 2446 if(errorCode != U_UNSUPPORTED_ERROR) { 2447 if (errorCode == U_ILLEGAL_ARGUMENT_ERROR || errorCode == U_FILE_ACCESS_ERROR) { 2448 dataerrln("Collator::createInstance(it-u-vt-u24) : %s", u_errorName(errorCode)); 2449 } else { 2450 errln("Collator::createInstance(%s) did not fail as expected - %s", 2451 localeID, u_errorName(errorCode)); 2452 } 2453 } 2454 } 2455 2456 void CollationAPITest::dump(UnicodeString msg, RuleBasedCollator* c, UErrorCode& status) { 2457 const char* bigone = "One"; 2458 const char* littleone = "one"; 2459 2460 logln(msg + " " + c->compare(bigone, littleone) + 2461 " s: " + c->getStrength() + 2462 " u: " + c->getAttribute(UCOL_CASE_FIRST, status)); 2463 } 2464 void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */) 2465 { 2466 if (exec) logln("TestSuite CollationAPITest: "); 2467 TESTCASE_AUTO_BEGIN; 2468 TESTCASE_AUTO(TestProperty); 2469 TESTCASE_AUTO(TestOperators); 2470 TESTCASE_AUTO(TestDuplicate); 2471 TESTCASE_AUTO(TestCompare); 2472 TESTCASE_AUTO(TestHashCode); 2473 TESTCASE_AUTO(TestCollationKey); 2474 TESTCASE_AUTO(TestElemIter); 2475 TESTCASE_AUTO(TestGetAll); 2476 TESTCASE_AUTO(TestRuleBasedColl); 2477 TESTCASE_AUTO(TestDecomposition); 2478 TESTCASE_AUTO(TestSafeClone); 2479 TESTCASE_AUTO(TestSortKey); 2480 TESTCASE_AUTO(TestSortKeyOverflow); 2481 TESTCASE_AUTO(TestMaxExpansion); 2482 TESTCASE_AUTO(TestDisplayName); 2483 TESTCASE_AUTO(TestAttribute); 2484 TESTCASE_AUTO(TestVariableTopSetting); 2485 TESTCASE_AUTO(TestMaxVariable); 2486 TESTCASE_AUTO(TestRules); 2487 TESTCASE_AUTO(TestGetLocale); 2488 TESTCASE_AUTO(TestBounds); 2489 TESTCASE_AUTO(TestGetTailoredSet); 2490 TESTCASE_AUTO(TestUClassID); 2491 TESTCASE_AUTO(TestSubclass); 2492 TESTCASE_AUTO(TestNULLCharTailoring); 2493 TESTCASE_AUTO(TestClone); 2494 TESTCASE_AUTO(TestCloneBinary); 2495 TESTCASE_AUTO(TestIterNumeric); 2496 TESTCASE_AUTO(TestBadKeywords); 2497 TESTCASE_AUTO_END; 2498 } 2499 2500 #endif /* #if !UCONFIG_NO_COLLATION */ 2501