1 /* 2 ****************************************************************************** 3 * Copyright (C) 1996-2010, International Business Machines * 4 * Corporation and others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 8 /** 9 * \file 10 * \brief C++ API: Collation Service. 11 */ 12 13 /** 14 * File coll.h 15 * 16 * Created by: Helena Shih 17 * 18 * Modification History: 19 * 20 * Date Name Description 21 * 02/5/97 aliu Modified createDefault to load collation data from 22 * binary files when possible. Added related methods 23 * createCollationFromFile, chopLocale, createPathName. 24 * 02/11/97 aliu Added members addToCache, findInCache, and fgCache. 25 * 02/12/97 aliu Modified to create objects from RuleBasedCollator cache. 26 * Moved cache out of Collation class. 27 * 02/13/97 aliu Moved several methods out of this class and into 28 * RuleBasedCollator, with modifications. Modified 29 * createDefault() to call new RuleBasedCollator(Locale&) 30 * constructor. General clean up and documentation. 31 * 02/20/97 helena Added clone, operator==, operator!=, operator=, copy 32 * constructor and getDynamicClassID. 33 * 03/25/97 helena Updated with platform independent data types. 34 * 05/06/97 helena Added memory allocation error detection. 35 * 06/20/97 helena Java class name change. 36 * 09/03/97 helena Added createCollationKeyValues(). 37 * 02/10/98 damiba Added compare() with length as parameter. 38 * 04/23/99 stephen Removed EDecompositionMode, merged with 39 * Normalizer::EMode. 40 * 11/02/99 helena Collator performance enhancements. Eliminates the 41 * UnicodeString construction and special case for NO_OP. 42 * 11/23/99 srl More performance enhancements. Inlining of 43 * critical accessors. 44 * 05/15/00 helena Added version information API. 45 * 01/29/01 synwee Modified into a C++ wrapper which calls C apis 46 * (ucoll.h). 47 */ 48 49 #ifndef COLL_H 50 #define COLL_H 51 52 #include "unicode/utypes.h" 53 54 #if !UCONFIG_NO_COLLATION 55 56 #include "unicode/uobject.h" 57 #include "unicode/ucol.h" 58 #include "unicode/normlzr.h" 59 #include "unicode/locid.h" 60 #include "unicode/uniset.h" 61 #include "unicode/umisc.h" 62 #include "unicode/uiter.h" 63 #include "unicode/stringpiece.h" 64 65 U_NAMESPACE_BEGIN 66 67 class StringEnumeration; 68 69 #if !UCONFIG_NO_SERVICE 70 /** 71 * @stable ICU 2.6 72 */ 73 class CollatorFactory; 74 #endif 75 76 /** 77 * @stable ICU 2.0 78 */ 79 class CollationKey; 80 81 /** 82 * The <code>Collator</code> class performs locale-sensitive string 83 * comparison.<br> 84 * You use this class to build searching and sorting routines for natural 85 * language text.<br> 86 * <em>Important: </em>The ICU collation service has been reimplemented 87 * in order to achieve better performance and UCA compliance. 88 * For details, see the 89 * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm"> 90 * collation design document</a>. 91 * <p> 92 * <code>Collator</code> is an abstract base class. Subclasses implement 93 * specific collation strategies. One subclass, 94 * <code>RuleBasedCollator</code>, is currently provided and is applicable 95 * to a wide set of languages. Other subclasses may be created to handle more 96 * specialized needs. 97 * <p> 98 * Like other locale-sensitive classes, you can use the static factory method, 99 * <code>createInstance</code>, to obtain the appropriate 100 * <code>Collator</code> object for a given locale. You will only need to 101 * look at the subclasses of <code>Collator</code> if you need to 102 * understand the details of a particular collation strategy or if you need to 103 * modify that strategy. 104 * <p> 105 * The following example shows how to compare two strings using the 106 * <code>Collator</code> for the default locale. 107 * \htmlonly<blockquote>\endhtmlonly 108 * <pre> 109 * \code 110 * // Compare two strings in the default locale 111 * UErrorCode success = U_ZERO_ERROR; 112 * Collator* myCollator = Collator::createInstance(success); 113 * if (myCollator->compare("abc", "ABC") < 0) 114 * cout << "abc is less than ABC" << endl; 115 * else 116 * cout << "abc is greater than or equal to ABC" << endl; 117 * \endcode 118 * </pre> 119 * \htmlonly</blockquote>\endhtmlonly 120 * <p> 121 * You can set a <code>Collator</code>'s <em>strength</em> property to 122 * determine the level of difference considered significant in comparisons. 123 * Five strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>, 124 * <code>TERTIARY</code>, <code>QUATERNARY</code> and <code>IDENTICAL</code>. 125 * The exact assignment of strengths to language features is locale dependant. 126 * For example, in Czech, "e" and "f" are considered primary differences, 127 * while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary 128 * differences and "e" and "e" are identical. The following shows how both case 129 * and accents could be ignored for US English. 130 * \htmlonly<blockquote>\endhtmlonly 131 * <pre> 132 * \code 133 * //Get the Collator for US English and set its strength to PRIMARY 134 * UErrorCode success = U_ZERO_ERROR; 135 * Collator* usCollator = Collator::createInstance(Locale::US, success); 136 * usCollator->setStrength(Collator::PRIMARY); 137 * if (usCollator->compare("abc", "ABC") == 0) 138 * cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl; 139 * \endcode 140 * </pre> 141 * \htmlonly</blockquote>\endhtmlonly 142 * <p> 143 * For comparing strings exactly once, the <code>compare</code> method 144 * provides the best performance. When sorting a list of strings however, it 145 * is generally necessary to compare each string multiple times. In this case, 146 * sort keys provide better performance. The <code>getSortKey</code> methods 147 * convert a string to a series of bytes that can be compared bitwise against 148 * other sort keys using <code>strcmp()</code>. Sort keys are written as 149 * zero-terminated byte strings. They consist of several substrings, one for 150 * each collation strength level, that are delimited by 0x01 bytes. 151 * If the string code points are appended for UCOL_IDENTICAL, then they are 152 * processed for correct code point order comparison and may contain 0x01 153 * bytes but not zero bytes. 154 * </p> 155 * <p> 156 * An older set of APIs returns a <code>CollationKey</code> object that wraps 157 * the sort key bytes instead of returning the bytes themselves. 158 * Its use is deprecated, but it is still available for compatibility with 159 * Java. 160 * </p> 161 * <p> 162 * <strong>Note:</strong> <code>Collator</code>s with different Locale, 163 * and CollationStrength settings will return different sort 164 * orders for the same set of strings. Locales have specific collation rules, 165 * and the way in which secondary and tertiary differences are taken into 166 * account, for example, will result in a different sorting order for same 167 * strings. 168 * </p> 169 * @see RuleBasedCollator 170 * @see CollationKey 171 * @see CollationElementIterator 172 * @see Locale 173 * @see Normalizer 174 * @version 2.0 11/15/01 175 */ 176 177 class U_I18N_API Collator : public UObject { 178 public: 179 180 // Collator public enums ----------------------------------------------- 181 182 /** 183 * Base letter represents a primary difference. Set comparison level to 184 * PRIMARY to ignore secondary and tertiary differences.<br> 185 * Use this to set the strength of a Collator object.<br> 186 * Example of primary difference, "abc" < "abd" 187 * 188 * Diacritical differences on the same base letter represent a secondary 189 * difference. Set comparison level to SECONDARY to ignore tertiary 190 * differences. Use this to set the strength of a Collator object.<br> 191 * Example of secondary difference, "ä" >> "a". 192 * 193 * Uppercase and lowercase versions of the same character represents a 194 * tertiary difference. Set comparison level to TERTIARY to include all 195 * comparison differences. Use this to set the strength of a Collator 196 * object.<br> 197 * Example of tertiary difference, "abc" <<< "ABC". 198 * 199 * Two characters are considered "identical" when they have the same unicode 200 * spellings.<br> 201 * For example, "ä" == "ä". 202 * 203 * UCollationStrength is also used to determine the strength of sort keys 204 * generated from Collator objects. 205 * @stable ICU 2.0 206 */ 207 enum ECollationStrength 208 { 209 PRIMARY = 0, 210 SECONDARY = 1, 211 TERTIARY = 2, 212 QUATERNARY = 3, 213 IDENTICAL = 15 214 }; 215 216 /** 217 * LESS is returned if source string is compared to be less than target 218 * string in the compare() method. 219 * EQUAL is returned if source string is compared to be equal to target 220 * string in the compare() method. 221 * GREATER is returned if source string is compared to be greater than 222 * target string in the compare() method. 223 * @see Collator#compare 224 * @deprecated ICU 2.6. Use C enum UCollationResult defined in ucol.h 225 */ 226 enum EComparisonResult 227 { 228 LESS = -1, 229 EQUAL = 0, 230 GREATER = 1 231 }; 232 233 // Collator public destructor ----------------------------------------- 234 235 /** 236 * Destructor 237 * @stable ICU 2.0 238 */ 239 virtual ~Collator(); 240 241 // Collator public methods -------------------------------------------- 242 243 /** 244 * Returns true if "other" is the same as "this" 245 * @param other Collator object to be compared 246 * @return true if other is the same as this. 247 * @stable ICU 2.0 248 */ 249 virtual UBool operator==(const Collator& other) const; 250 251 /** 252 * Returns true if "other" is not the same as "this". 253 * @param other Collator object to be compared 254 * @return true if other is not the same as this. 255 * @stable ICU 2.0 256 */ 257 virtual UBool operator!=(const Collator& other) const; 258 259 /** 260 * Makes a shallow copy of the current object. 261 * @return a copy of this object 262 * @stable ICU 2.0 263 */ 264 virtual Collator* clone(void) const = 0; 265 266 /** 267 * Creates the Collator object for the current default locale. 268 * The default locale is determined by Locale::getDefault. 269 * The UErrorCode& err parameter is used to return status information to the user. 270 * To check whether the construction succeeded or not, you should check the 271 * value of U_SUCCESS(err). If you wish more detailed information, you can 272 * check for informational error results which still indicate success. 273 * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For 274 * example, 'de_CH' was requested, but nothing was found there, so 'de' was 275 * used. U_USING_DEFAULT_ERROR indicates that the default locale data was 276 * used; neither the requested locale nor any of its fall back locales 277 * could be found. 278 * The caller owns the returned object and is responsible for deleting it. 279 * 280 * @param err the error code status. 281 * @return the collation object of the default locale.(for example, en_US) 282 * @see Locale#getDefault 283 * @stable ICU 2.0 284 */ 285 static Collator* U_EXPORT2 createInstance(UErrorCode& err); 286 287 /** 288 * Gets the table-based collation object for the desired locale. The 289 * resource of the desired locale will be loaded by ResourceLoader. 290 * Locale::ENGLISH is the base collation table and all other languages are 291 * built on top of it with additional language-specific modifications. 292 * The UErrorCode& err parameter is used to return status information to the user. 293 * To check whether the construction succeeded or not, you should check 294 * the value of U_SUCCESS(err). If you wish more detailed information, you 295 * can check for informational error results which still indicate success. 296 * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For 297 * example, 'de_CH' was requested, but nothing was found there, so 'de' was 298 * used. U_USING_DEFAULT_ERROR indicates that the default locale data was 299 * used; neither the requested locale nor any of its fall back locales 300 * could be found. 301 * The caller owns the returned object and is responsible for deleting it. 302 * @param loc The locale ID for which to open a collator. 303 * @param err the error code status. 304 * @return the created table-based collation object based on the desired 305 * locale. 306 * @see Locale 307 * @see ResourceLoader 308 * @stable ICU 2.2 309 */ 310 static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err); 311 312 #ifdef U_USE_COLLATION_OBSOLETE_2_6 313 /** 314 * Create a Collator with a specific version. 315 * This is the same as createInstance(loc, err) except that getVersion() of 316 * the returned object is guaranteed to be the same as the version 317 * parameter. 318 * This is designed to be used to open the same collator for a given 319 * locale even when ICU is updated. 320 * The same locale and version guarantees the same sort keys and 321 * comparison results. 322 * <p> 323 * Note: this API will be removed in a future release. Use 324 * <tt>createInstance(const Locale&, UErrorCode&) instead.</tt></p> 325 * 326 * @param loc The locale ID for which to open a collator. 327 * @param version The requested collator version. 328 * @param err A reference to a UErrorCode, 329 * must not indicate a failure before calling this function. 330 * @return A pointer to a Collator, or 0 if an error occurred 331 * or a collator with the requested version is not available. 332 * 333 * @see getVersion 334 * @obsolete ICU 2.6 335 */ 336 static Collator *createInstance(const Locale &loc, UVersionInfo version, UErrorCode &err); 337 #endif 338 339 /** 340 * The comparison function compares the character data stored in two 341 * different strings. Returns information about whether a string is less 342 * than, greater than or equal to another string. 343 * @param source the source string to be compared with. 344 * @param target the string that is to be compared with the source string. 345 * @return Returns a byte value. GREATER if source is greater 346 * than target; EQUAL if source is equal to target; LESS if source is less 347 * than target 348 * @deprecated ICU 2.6 use the overload with UErrorCode & 349 */ 350 virtual EComparisonResult compare(const UnicodeString& source, 351 const UnicodeString& target) const; 352 353 /** 354 * The comparison function compares the character data stored in two 355 * different strings. Returns information about whether a string is less 356 * than, greater than or equal to another string. 357 * @param source the source string to be compared with. 358 * @param target the string that is to be compared with the source string. 359 * @param status possible error code 360 * @return Returns an enum value. UCOL_GREATER if source is greater 361 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less 362 * than target 363 * @stable ICU 2.6 364 */ 365 virtual UCollationResult compare(const UnicodeString& source, 366 const UnicodeString& target, 367 UErrorCode &status) const = 0; 368 369 /** 370 * Does the same thing as compare but limits the comparison to a specified 371 * length 372 * @param source the source string to be compared with. 373 * @param target the string that is to be compared with the source string. 374 * @param length the length the comparison is limited to 375 * @return Returns a byte value. GREATER if source (up to the specified 376 * length) is greater than target; EQUAL if source (up to specified 377 * length) is equal to target; LESS if source (up to the specified 378 * length) is less than target. 379 * @deprecated ICU 2.6 use the overload with UErrorCode & 380 */ 381 virtual EComparisonResult compare(const UnicodeString& source, 382 const UnicodeString& target, 383 int32_t length) const; 384 385 /** 386 * Does the same thing as compare but limits the comparison to a specified 387 * length 388 * @param source the source string to be compared with. 389 * @param target the string that is to be compared with the source string. 390 * @param length the length the comparison is limited to 391 * @param status possible error code 392 * @return Returns an enum value. UCOL_GREATER if source (up to the specified 393 * length) is greater than target; UCOL_EQUAL if source (up to specified 394 * length) is equal to target; UCOL_LESS if source (up to the specified 395 * length) is less than target. 396 * @stable ICU 2.6 397 */ 398 virtual UCollationResult compare(const UnicodeString& source, 399 const UnicodeString& target, 400 int32_t length, 401 UErrorCode &status) const = 0; 402 403 /** 404 * The comparison function compares the character data stored in two 405 * different string arrays. Returns information about whether a string array 406 * is less than, greater than or equal to another string array. 407 * @param source the source string array to be compared with. 408 * @param sourceLength the length of the source string array. If this value 409 * is equal to -1, the string array is null-terminated. 410 * @param target the string that is to be compared with the source string. 411 * @param targetLength the length of the target string array. If this value 412 * is equal to -1, the string array is null-terminated. 413 * @return Returns a byte value. GREATER if source is greater than target; 414 * EQUAL if source is equal to target; LESS if source is less than 415 * target 416 * @deprecated ICU 2.6 use the overload with UErrorCode & 417 */ 418 virtual EComparisonResult compare(const UChar* source, int32_t sourceLength, 419 const UChar* target, int32_t targetLength) 420 const; 421 422 /** 423 * The comparison function compares the character data stored in two 424 * different string arrays. Returns information about whether a string array 425 * is less than, greater than or equal to another string array. 426 * @param source the source string array to be compared with. 427 * @param sourceLength the length of the source string array. If this value 428 * is equal to -1, the string array is null-terminated. 429 * @param target the string that is to be compared with the source string. 430 * @param targetLength the length of the target string array. If this value 431 * is equal to -1, the string array is null-terminated. 432 * @param status possible error code 433 * @return Returns an enum value. UCOL_GREATER if source is greater 434 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less 435 * than target 436 * @stable ICU 2.6 437 */ 438 virtual UCollationResult compare(const UChar* source, int32_t sourceLength, 439 const UChar* target, int32_t targetLength, 440 UErrorCode &status) const = 0; 441 442 /** 443 * Compares two strings using the Collator. 444 * Returns whether the first one compares less than/equal to/greater than 445 * the second one. 446 * This version takes UCharIterator input. 447 * @param sIter the first ("source") string iterator 448 * @param tIter the second ("target") string iterator 449 * @param status ICU status 450 * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER 451 * @stable ICU 4.2 452 */ 453 virtual UCollationResult compare(UCharIterator &sIter, 454 UCharIterator &tIter, 455 UErrorCode &status) const; 456 457 /** 458 * Compares two UTF-8 strings using the Collator. 459 * Returns whether the first one compares less than/equal to/greater than 460 * the second one. 461 * This version takes UTF-8 input. 462 * Note that a StringPiece can be implicitly constructed 463 * from a std::string or a NUL-terminated const char * string. 464 * @param source the first UTF-8 string 465 * @param target the second UTF-8 string 466 * @param status ICU status 467 * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER 468 * @stable ICU 4.2 469 */ 470 virtual UCollationResult compareUTF8(const StringPiece &source, 471 const StringPiece &target, 472 UErrorCode &status) const; 473 474 /** 475 * Transforms the string into a series of characters that can be compared 476 * with CollationKey::compareTo. It is not possible to restore the original 477 * string from the chars in the sort key. The generated sort key handles 478 * only a limited number of ignorable characters. 479 * <p>Use CollationKey::equals or CollationKey::compare to compare the 480 * generated sort keys. 481 * If the source string is null, a null collation key will be returned. 482 * @param source the source string to be transformed into a sort key. 483 * @param key the collation key to be filled in 484 * @param status the error code status. 485 * @return the collation key of the string based on the collation rules. 486 * @see CollationKey#compare 487 * @deprecated ICU 2.8 Use getSortKey(...) instead 488 */ 489 virtual CollationKey& getCollationKey(const UnicodeString& source, 490 CollationKey& key, 491 UErrorCode& status) const = 0; 492 493 /** 494 * Transforms the string into a series of characters that can be compared 495 * with CollationKey::compareTo. It is not possible to restore the original 496 * string from the chars in the sort key. The generated sort key handles 497 * only a limited number of ignorable characters. 498 * <p>Use CollationKey::equals or CollationKey::compare to compare the 499 * generated sort keys. 500 * <p>If the source string is null, a null collation key will be returned. 501 * @param source the source string to be transformed into a sort key. 502 * @param sourceLength length of the collation key 503 * @param key the collation key to be filled in 504 * @param status the error code status. 505 * @return the collation key of the string based on the collation rules. 506 * @see CollationKey#compare 507 * @deprecated ICU 2.8 Use getSortKey(...) instead 508 */ 509 virtual CollationKey& getCollationKey(const UChar*source, 510 int32_t sourceLength, 511 CollationKey& key, 512 UErrorCode& status) const = 0; 513 /** 514 * Generates the hash code for the collation object 515 * @stable ICU 2.0 516 */ 517 virtual int32_t hashCode(void) const = 0; 518 519 /** 520 * Gets the locale of the Collator 521 * 522 * @param type can be either requested, valid or actual locale. For more 523 * information see the definition of ULocDataLocaleType in 524 * uloc.h 525 * @param status the error code status. 526 * @return locale where the collation data lives. If the collator 527 * was instantiated from rules, locale is empty. 528 * @deprecated ICU 2.8 This API is under consideration for revision 529 * in ICU 3.0. 530 */ 531 virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0; 532 533 /** 534 * Convenience method for comparing two strings based on the collation rules. 535 * @param source the source string to be compared with. 536 * @param target the target string to be compared with. 537 * @return true if the first string is greater than the second one, 538 * according to the collation rules. false, otherwise. 539 * @see Collator#compare 540 * @stable ICU 2.0 541 */ 542 UBool greater(const UnicodeString& source, const UnicodeString& target) 543 const; 544 545 /** 546 * Convenience method for comparing two strings based on the collation rules. 547 * @param source the source string to be compared with. 548 * @param target the target string to be compared with. 549 * @return true if the first string is greater than or equal to the second 550 * one, according to the collation rules. false, otherwise. 551 * @see Collator#compare 552 * @stable ICU 2.0 553 */ 554 UBool greaterOrEqual(const UnicodeString& source, 555 const UnicodeString& target) const; 556 557 /** 558 * Convenience method for comparing two strings based on the collation rules. 559 * @param source the source string to be compared with. 560 * @param target the target string to be compared with. 561 * @return true if the strings are equal according to the collation rules. 562 * false, otherwise. 563 * @see Collator#compare 564 * @stable ICU 2.0 565 */ 566 UBool equals(const UnicodeString& source, const UnicodeString& target) const; 567 568 /** 569 * Determines the minimum strength that will be use in comparison or 570 * transformation. 571 * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored 572 * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference 573 * are ignored. 574 * @return the current comparison level. 575 * @see Collator#setStrength 576 * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead 577 */ 578 virtual ECollationStrength getStrength(void) const = 0; 579 580 /** 581 * Sets the minimum strength to be used in comparison or transformation. 582 * <p>Example of use: 583 * <pre> 584 * \code 585 * UErrorCode status = U_ZERO_ERROR; 586 * Collator*myCollation = Collator::createInstance(Locale::US, status); 587 * if (U_FAILURE(status)) return; 588 * myCollation->setStrength(Collator::PRIMARY); 589 * // result will be "abc" == "ABC" 590 * // tertiary differences will be ignored 591 * Collator::ComparisonResult result = myCollation->compare("abc", "ABC"); 592 * \endcode 593 * </pre> 594 * @see Collator#getStrength 595 * @param newStrength the new comparison level. 596 * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead 597 */ 598 virtual void setStrength(ECollationStrength newStrength) = 0; 599 600 /** 601 * Get the current reordering of scripts (if one has been set). 602 * @param dest The array to fill with the script ordering. 603 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting). 604 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a failure before the function call. 605 * @return The length of the array of the script ordering. 606 * @see ucol_getReorderCodes 607 * @internal 608 */ 609 virtual int32_t getReorderCodes(int32_t *dest, 610 int32_t destCapacity, 611 UErrorCode& status) const; 612 613 /** 614 * Set the ordering of scripts for this collator. 615 * @param reorderCodes An array of reorder codes in the new order. 616 * @param reorderCodesLength The length of reorderCodes. 617 * @see ucol_setReorderCodes 618 * @internal 619 */ 620 virtual void setReorderCodes(const int32_t* reorderCodes, 621 int32_t reorderCodesLength, 622 UErrorCode& status) ; 623 624 /** 625 * Get name of the object for the desired Locale, in the desired langauge 626 * @param objectLocale must be from getAvailableLocales 627 * @param displayLocale specifies the desired locale for output 628 * @param name the fill-in parameter of the return value 629 * @return display-able name of the object for the object locale in the 630 * desired language 631 * @stable ICU 2.0 632 */ 633 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, 634 const Locale& displayLocale, 635 UnicodeString& name); 636 637 /** 638 * Get name of the object for the desired Locale, in the langauge of the 639 * default locale. 640 * @param objectLocale must be from getAvailableLocales 641 * @param name the fill-in parameter of the return value 642 * @return name of the object for the desired locale in the default language 643 * @stable ICU 2.0 644 */ 645 static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, 646 UnicodeString& name); 647 648 /** 649 * Get the set of Locales for which Collations are installed. 650 * 651 * <p>Note this does not include locales supported by registered collators. 652 * If collators might have been registered, use the overload of getAvailableLocales 653 * that returns a StringEnumeration.</p> 654 * 655 * @param count the output parameter of number of elements in the locale list 656 * @return the list of available locales for which collations are installed 657 * @stable ICU 2.0 658 */ 659 static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); 660 661 /** 662 * Return a StringEnumeration over the locales available at the time of the call, 663 * including registered locales. If a severe error occurs (such as out of memory 664 * condition) this will return null. If there is no locale data, an empty enumeration 665 * will be returned. 666 * @return a StringEnumeration over the locales available at the time of the call 667 * @stable ICU 2.6 668 */ 669 static StringEnumeration* U_EXPORT2 getAvailableLocales(void); 670 671 /** 672 * Create a string enumerator of all possible keywords that are relevant to 673 * collation. At this point, the only recognized keyword for this 674 * service is "collation". 675 * @param status input-output error code 676 * @return a string enumeration over locale strings. The caller is 677 * responsible for closing the result. 678 * @stable ICU 3.0 679 */ 680 static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status); 681 682 /** 683 * Given a keyword, create a string enumeration of all values 684 * for that keyword that are currently in use. 685 * @param keyword a particular keyword as enumerated by 686 * ucol_getKeywords. If any other keyword is passed in, status is set 687 * to U_ILLEGAL_ARGUMENT_ERROR. 688 * @param status input-output error code 689 * @return a string enumeration over collation keyword values, or NULL 690 * upon error. The caller is responsible for deleting the result. 691 * @stable ICU 3.0 692 */ 693 static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status); 694 695 /** 696 * Given a key and a locale, returns an array of string values in a preferred 697 * order that would make a difference. These are all and only those values where 698 * the open (creation) of the service with the locale formed from the input locale 699 * plus input keyword and that value has different behavior than creation with the 700 * input locale alone. 701 * @param keyword one of the keys supported by this service. For now, only 702 * "collation" is supported. 703 * @param locale the locale 704 * @param commonlyUsed if set to true it will return only commonly used values 705 * with the given locale in preferred order. Otherwise, 706 * it will return all the available values for the locale. 707 * @param status ICU status 708 * @return a string enumeration over keyword values for the given key and the locale. 709 * @stable ICU 4.2 710 */ 711 static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* keyword, const Locale& locale, 712 UBool commonlyUsed, UErrorCode& status); 713 714 /** 715 * Return the functionally equivalent locale for the given 716 * requested locale, with respect to given keyword, for the 717 * collation service. If two locales return the same result, then 718 * collators instantiated for these locales will behave 719 * equivalently. The converse is not always true; two collators 720 * may in fact be equivalent, but return different results, due to 721 * internal details. The return result has no other meaning than 722 * that stated above, and implies nothing as to the relationship 723 * between the two locales. This is intended for use by 724 * applications who wish to cache collators, or otherwise reuse 725 * collators when possible. The functional equivalent may change 726 * over time. For more information, please see the <a 727 * href="http://icu-project.org/userguide/locale.html#services"> 728 * Locales and Services</a> section of the ICU User Guide. 729 * @param keyword a particular keyword as enumerated by 730 * ucol_getKeywords. 731 * @param locale the requested locale 732 * @param isAvailable reference to a fillin parameter that 733 * indicates whether the requested locale was 'available' to the 734 * collation service. A locale is defined as 'available' if it 735 * physically exists within the collation locale data. 736 * @param status reference to input-output error code 737 * @return the functionally equivalent collation locale, or the root 738 * locale upon error. 739 * @stable ICU 3.0 740 */ 741 static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale, 742 UBool& isAvailable, UErrorCode& status); 743 744 #if !UCONFIG_NO_SERVICE 745 /** 746 * Register a new Collator. The collator will be adopted. 747 * @param toAdopt the Collator instance to be adopted 748 * @param locale the locale with which the collator will be associated 749 * @param status the in/out status code, no special meanings are assigned 750 * @return a registry key that can be used to unregister this collator 751 * @stable ICU 2.6 752 */ 753 static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status); 754 755 /** 756 * Register a new CollatorFactory. The factory will be adopted. 757 * @param toAdopt the CollatorFactory instance to be adopted 758 * @param status the in/out status code, no special meanings are assigned 759 * @return a registry key that can be used to unregister this collator 760 * @stable ICU 2.6 761 */ 762 static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status); 763 764 /** 765 * Unregister a previously-registered Collator or CollatorFactory 766 * using the key returned from the register call. Key becomes 767 * invalid after a successful call and should not be used again. 768 * The object corresponding to the key will be deleted. 769 * @param key the registry key returned by a previous call to registerInstance 770 * @param status the in/out status code, no special meanings are assigned 771 * @return TRUE if the collator for the key was successfully unregistered 772 * @stable ICU 2.6 773 */ 774 static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status); 775 #endif /* UCONFIG_NO_SERVICE */ 776 777 /** 778 * Gets the version information for a Collator. 779 * @param info the version # information, the result will be filled in 780 * @stable ICU 2.0 781 */ 782 virtual void getVersion(UVersionInfo info) const = 0; 783 784 /** 785 * Returns a unique class ID POLYMORPHICALLY. Pure virtual method. 786 * This method is to implement a simple version of RTTI, since not all C++ 787 * compilers support genuine RTTI. Polymorphic operator==() and clone() 788 * methods call this method. 789 * @return The class ID for this object. All objects of a given class have 790 * the same class ID. Objects of other classes have different class 791 * IDs. 792 * @stable ICU 2.0 793 */ 794 virtual UClassID getDynamicClassID(void) const = 0; 795 796 /** 797 * Universal attribute setter 798 * @param attr attribute type 799 * @param value attribute value 800 * @param status to indicate whether the operation went on smoothly or 801 * there were errors 802 * @stable ICU 2.2 803 */ 804 virtual void setAttribute(UColAttribute attr, UColAttributeValue value, 805 UErrorCode &status) = 0; 806 807 /** 808 * Universal attribute getter 809 * @param attr attribute type 810 * @param status to indicate whether the operation went on smoothly or 811 * there were errors 812 * @return attribute value 813 * @stable ICU 2.2 814 */ 815 virtual UColAttributeValue getAttribute(UColAttribute attr, 816 UErrorCode &status) = 0; 817 818 /** 819 * Sets the variable top to a collation element value of a string supplied. 820 * @param varTop one or more (if contraction) UChars to which the variable top should be set 821 * @param len length of variable top string. If -1 it is considered to be zero terminated. 822 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> 823 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br> 824 * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes 825 * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined 826 * @stable ICU 2.0 827 */ 828 virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0; 829 830 /** 831 * Sets the variable top to a collation element value of a string supplied. 832 * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set 833 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> 834 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br> 835 * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes 836 * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined 837 * @stable ICU 2.0 838 */ 839 virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status) = 0; 840 841 /** 842 * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits. 843 * Lower 16 bits are ignored. 844 * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop 845 * @param status error code (not changed by function) 846 * @stable ICU 2.0 847 */ 848 virtual void setVariableTop(const uint32_t varTop, UErrorCode &status) = 0; 849 850 /** 851 * Gets the variable top value of a Collator. 852 * Lower 16 bits are undefined and should be ignored. 853 * @param status error code (not changed by function). If error code is set, the return value is undefined. 854 * @stable ICU 2.0 855 */ 856 virtual uint32_t getVariableTop(UErrorCode &status) const = 0; 857 858 /** 859 * Get an UnicodeSet that contains all the characters and sequences 860 * tailored in this collator. 861 * @param status error code of the operation 862 * @return a pointer to a UnicodeSet object containing all the 863 * code points and sequences that may sort differently than 864 * in the UCA. The object must be disposed of by using delete 865 * @stable ICU 2.4 866 */ 867 virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; 868 869 870 /** 871 * Thread safe cloning operation 872 * @return pointer to the new clone, user should remove it. 873 * @stable ICU 2.2 874 */ 875 virtual Collator* safeClone(void) = 0; 876 877 /** 878 * Get the sort key as an array of bytes from an UnicodeString. 879 * Sort key byte arrays are zero-terminated and can be compared using 880 * strcmp(). 881 * @param source string to be processed. 882 * @param result buffer to store result in. If NULL, number of bytes needed 883 * will be returned. 884 * @param resultLength length of the result buffer. If if not enough the 885 * buffer will be filled to capacity. 886 * @return Number of bytes needed for storing the sort key 887 * @stable ICU 2.2 888 */ 889 virtual int32_t getSortKey(const UnicodeString& source, 890 uint8_t* result, 891 int32_t resultLength) const = 0; 892 893 /** 894 * Get the sort key as an array of bytes from an UChar buffer. 895 * Sort key byte arrays are zero-terminated and can be compared using 896 * strcmp(). 897 * @param source string to be processed. 898 * @param sourceLength length of string to be processed. 899 * If -1, the string is 0 terminated and length will be decided by the 900 * function. 901 * @param result buffer to store result in. If NULL, number of bytes needed 902 * will be returned. 903 * @param resultLength length of the result buffer. If if not enough the 904 * buffer will be filled to capacity. 905 * @return Number of bytes needed for storing the sort key 906 * @stable ICU 2.2 907 */ 908 virtual int32_t getSortKey(const UChar*source, int32_t sourceLength, 909 uint8_t*result, int32_t resultLength) const = 0; 910 911 /** 912 * Produce a bound for a given sortkey and a number of levels. 913 * Return value is always the number of bytes needed, regardless of 914 * whether the result buffer was big enough or even valid.<br> 915 * Resulting bounds can be used to produce a range of strings that are 916 * between upper and lower bounds. For example, if bounds are produced 917 * for a sortkey of string "smith", strings between upper and lower 918 * bounds with one level would include "Smith", "SMITH", "sMiTh".<br> 919 * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER 920 * is produced, strings matched would be as above. However, if bound 921 * produced using UCOL_BOUND_UPPER_LONG is used, the above example will 922 * also match "Smithsonian" and similar.<br> 923 * For more on usage, see example in cintltst/capitst.c in procedure 924 * TestBounds. 925 * Sort keys may be compared using <TT>strcmp</TT>. 926 * @param source The source sortkey. 927 * @param sourceLength The length of source, or -1 if null-terminated. 928 * (If an unmodified sortkey is passed, it is always null 929 * terminated). 930 * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which 931 * produces a lower inclusive bound, UCOL_BOUND_UPPER, that 932 * produces upper bound that matches strings of the same length 933 * or UCOL_BOUND_UPPER_LONG that matches strings that have the 934 * same starting substring as the source string. 935 * @param noOfLevels Number of levels required in the resulting bound (for most 936 * uses, the recommended value is 1). See users guide for 937 * explanation on number of levels a sortkey can have. 938 * @param result A pointer to a buffer to receive the resulting sortkey. 939 * @param resultLength The maximum size of result. 940 * @param status Used for returning error code if something went wrong. If the 941 * number of levels requested is higher than the number of levels 942 * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is 943 * issued. 944 * @return The size needed to fully store the bound. 945 * @see ucol_keyHashCode 946 * @stable ICU 2.1 947 */ 948 static int32_t U_EXPORT2 getBound(const uint8_t *source, 949 int32_t sourceLength, 950 UColBoundMode boundType, 951 uint32_t noOfLevels, 952 uint8_t *result, 953 int32_t resultLength, 954 UErrorCode &status); 955 956 957 protected: 958 959 // Collator protected constructors ------------------------------------- 960 961 /** 962 * Default constructor. 963 * Constructor is different from the old default Collator constructor. 964 * The task for determing the default collation strength and normalization 965 * mode is left to the child class. 966 * @stable ICU 2.0 967 */ 968 Collator(); 969 970 /** 971 * Constructor. 972 * Empty constructor, does not handle the arguments. 973 * This constructor is done for backward compatibility with 1.7 and 1.8. 974 * The task for handling the argument collation strength and normalization 975 * mode is left to the child class. 976 * @param collationStrength collation strength 977 * @param decompositionMode 978 * @deprecated ICU 2.4. Subclasses should use the default constructor 979 * instead and handle the strength and normalization mode themselves. 980 */ 981 Collator(UCollationStrength collationStrength, 982 UNormalizationMode decompositionMode); 983 984 /** 985 * Copy constructor. 986 * @param other Collator object to be copied from 987 * @stable ICU 2.0 988 */ 989 Collator(const Collator& other); 990 991 // Collator protected methods ----------------------------------------- 992 993 994 /** 995 * Used internally by registraton to define the requested and valid locales. 996 * @param requestedLocale the requested locale 997 * @param validLocale the valid locale 998 * @param actualLocale the actual locale 999 * @internal 1000 */ 1001 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); 1002 1003 public: 1004 #if !UCONFIG_NO_SERVICE 1005 /** 1006 * used only by ucol_open, not for public use 1007 * @internal 1008 */ 1009 static UCollator* createUCollator(const char* loc, UErrorCode* status); 1010 #endif 1011 private: 1012 /** 1013 * Assignment operator. Private for now. 1014 * @internal 1015 */ 1016 Collator& operator=(const Collator& other); 1017 1018 friend class CFactory; 1019 friend class SimpleCFactory; 1020 friend class ICUCollatorFactory; 1021 friend class ICUCollatorService; 1022 static Collator* makeInstance(const Locale& desiredLocale, 1023 UErrorCode& status); 1024 1025 // Collator private data members --------------------------------------- 1026 1027 /* 1028 synwee : removed as attributes to be handled by child class 1029 UCollationStrength strength; 1030 Normalizer::EMode decmp; 1031 */ 1032 /* This is useless information */ 1033 /* static const UVersionInfo fVersion;*/ 1034 }; 1035 1036 #if !UCONFIG_NO_SERVICE 1037 /** 1038 * A factory, used with registerFactory, the creates multiple collators and provides 1039 * display names for them. A factory supports some number of locales-- these are the 1040 * locales for which it can create collators. The factory can be visible, in which 1041 * case the supported locales will be enumerated by getAvailableLocales, or invisible, 1042 * in which they are not. Invisible locales are still supported, they are just not 1043 * listed by getAvailableLocales. 1044 * <p> 1045 * If standard locale display names are sufficient, Collator instances can 1046 * be registered using registerInstance instead.</p> 1047 * <p> 1048 * Note: if the collators are to be used from C APIs, they must be instances 1049 * of RuleBasedCollator.</p> 1050 * 1051 * @stable ICU 2.6 1052 */ 1053 class U_I18N_API CollatorFactory : public UObject { 1054 public: 1055 1056 /** 1057 * Destructor 1058 * @stable ICU 3.0 1059 */ 1060 virtual ~CollatorFactory(); 1061 1062 /** 1063 * Return true if this factory is visible. Default is true. 1064 * If not visible, the locales supported by this factory will not 1065 * be listed by getAvailableLocales. 1066 * @return true if the factory is visible. 1067 * @stable ICU 2.6 1068 */ 1069 virtual UBool visible(void) const; 1070 1071 /** 1072 * Return a collator for the provided locale. If the locale 1073 * is not supported, return NULL. 1074 * @param loc the locale identifying the collator to be created. 1075 * @return a new collator if the locale is supported, otherwise NULL. 1076 * @stable ICU 2.6 1077 */ 1078 virtual Collator* createCollator(const Locale& loc) = 0; 1079 1080 /** 1081 * Return the name of the collator for the objectLocale, localized for the displayLocale. 1082 * If objectLocale is not supported, or the factory is not visible, set the result string 1083 * to bogus. 1084 * @param objectLocale the locale identifying the collator 1085 * @param displayLocale the locale for which the display name of the collator should be localized 1086 * @param result an output parameter for the display name, set to bogus if not supported. 1087 * @return the display name 1088 * @stable ICU 2.6 1089 */ 1090 virtual UnicodeString& getDisplayName(const Locale& objectLocale, 1091 const Locale& displayLocale, 1092 UnicodeString& result); 1093 1094 /** 1095 * Return an array of all the locale names directly supported by this factory. 1096 * The number of names is returned in count. This array is owned by the factory. 1097 * Its contents must never change. 1098 * @param count output parameter for the number of locales supported by the factory 1099 * @param status the in/out error code 1100 * @return a pointer to an array of count UnicodeStrings. 1101 * @stable ICU 2.6 1102 */ 1103 virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) = 0; 1104 }; 1105 #endif /* UCONFIG_NO_SERVICE */ 1106 1107 // Collator inline methods ----------------------------------------------- 1108 1109 U_NAMESPACE_END 1110 1111 #endif /* #if !UCONFIG_NO_COLLATION */ 1112 1113 #endif 1114