1 /* 2 ****************************************************************************** 3 * Copyright (C) 1996-2011, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ****************************************************************************** 6 */ 7 8 /** 9 * \file 10 * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator. 11 */ 12 13 /** 14 * File tblcoll.h 15 * 16 * Created by: Helena Shih 17 * 18 * Modification History: 19 * 20 * Date Name Description 21 * 2/5/97 aliu Added streamIn and streamOut methods. Added 22 * constructor which reads RuleBasedCollator object from 23 * a binary file. Added writeToFile method which streams 24 * RuleBasedCollator out to a binary file. The streamIn 25 * and streamOut methods use istream and ostream objects 26 * in binary mode. 27 * 2/12/97 aliu Modified to use TableCollationData sub-object to 28 * hold invariant data. 29 * 2/13/97 aliu Moved several methods into this class from Collation. 30 * Added a private RuleBasedCollator(Locale&) constructor, 31 * to be used by Collator::createDefault(). General 32 * clean up. 33 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and copy 34 * constructor and getDynamicClassID. 35 * 3/5/97 aliu Modified constructFromFile() to add parameter 36 * specifying whether or not binary loading is to be 37 * attempted. This is required for dynamic rule loading. 38 * 05/07/97 helena Added memory allocation error detection. 39 * 6/17/97 helena Added IDENTICAL strength for compare, changed getRules to 40 * use MergeCollation::getPattern. 41 * 6/20/97 helena Java class name change. 42 * 8/18/97 helena Added internal API documentation. 43 * 09/03/97 helena Added createCollationKeyValues(). 44 * 02/10/98 damiba Added compare with "length" parameter 45 * 08/05/98 erm Synched with 1.2 version of RuleBasedCollator.java 46 * 04/23/99 stephen Removed EDecompositionMode, merged with 47 * Normalizer::EMode 48 * 06/14/99 stephen Removed kResourceBundleSuffix 49 * 11/02/99 helena Collator performance enhancements. Eliminates the 50 * UnicodeString construction and special case for NO_OP. 51 * 11/23/99 srl More performance enhancements. Updates to NormalizerIterator 52 * internal state management. 53 * 12/15/99 aliu Update to support Thai collation. Move NormalizerIterator 54 * to implementation file. 55 * 01/29/01 synwee Modified into a C++ wrapper which calls C API 56 * (ucol.h) 57 */ 58 59 #ifndef TBLCOLL_H 60 #define TBLCOLL_H 61 62 #include "unicode/utypes.h" 63 64 65 #if !UCONFIG_NO_COLLATION 66 67 #include "unicode/coll.h" 68 #include "unicode/ucol.h" 69 #include "unicode/sortkey.h" 70 #include "unicode/normlzr.h" 71 72 U_NAMESPACE_BEGIN 73 74 /** 75 * @stable ICU 2.0 76 */ 77 class StringSearch; 78 /** 79 * @stable ICU 2.0 80 */ 81 class CollationElementIterator; 82 83 /** 84 * The RuleBasedCollator class provides the simple implementation of 85 * Collator, using data-driven tables. The user can create a customized 86 * table-based collation. 87 * <P> 88 * <em>Important: </em>The ICU collation service has been reimplemented 89 * in order to achieve better performance and UCA compliance. 90 * For details, see the 91 * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm"> 92 * collation design document</a>. 93 * <p> 94 * RuleBasedCollator is a thin C++ wrapper over the C implementation. 95 * <p> 96 * For more information about the collation service see 97 * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>. 98 * <p> 99 * Collation service provides correct sorting orders for most locales supported in ICU. 100 * If specific data for a locale is not available, the orders eventually falls back 101 * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 102 * <p> 103 * Sort ordering may be customized by providing your own set of rules. For more on 104 * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html"> 105 * Collation customization</a> section of the users guide. 106 * <p> 107 * Note, RuleBasedCollator is not to be subclassed. 108 * @see Collator 109 * @version 2.0 11/15/2001 110 */ 111 class U_I18N_API RuleBasedCollator : public Collator 112 { 113 public: 114 115 // constructor ------------------------------------------------------------- 116 117 /** 118 * RuleBasedCollator constructor. This takes the table rules and builds a 119 * collation table out of them. Please see RuleBasedCollator class 120 * description for more details on the collation rule syntax. 121 * @param rules the collation rules to build the collation table from. 122 * @param status reporting a success or an error. 123 * @see Locale 124 * @stable ICU 2.0 125 */ 126 RuleBasedCollator(const UnicodeString& rules, UErrorCode& status); 127 128 /** 129 * RuleBasedCollator constructor. This takes the table rules and builds a 130 * collation table out of them. Please see RuleBasedCollator class 131 * description for more details on the collation rule syntax. 132 * @param rules the collation rules to build the collation table from. 133 * @param collationStrength default strength for comparison 134 * @param status reporting a success or an error. 135 * @see Locale 136 * @stable ICU 2.0 137 */ 138 RuleBasedCollator(const UnicodeString& rules, 139 ECollationStrength collationStrength, 140 UErrorCode& status); 141 142 /** 143 * RuleBasedCollator constructor. This takes the table rules and builds a 144 * collation table out of them. Please see RuleBasedCollator class 145 * description for more details on the collation rule syntax. 146 * @param rules the collation rules to build the collation table from. 147 * @param decompositionMode the normalisation mode 148 * @param status reporting a success or an error. 149 * @see Locale 150 * @stable ICU 2.0 151 */ 152 RuleBasedCollator(const UnicodeString& rules, 153 UColAttributeValue decompositionMode, 154 UErrorCode& status); 155 156 /** 157 * RuleBasedCollator constructor. This takes the table rules and builds a 158 * collation table out of them. Please see RuleBasedCollator class 159 * description for more details on the collation rule syntax. 160 * @param rules the collation rules to build the collation table from. 161 * @param collationStrength default strength for comparison 162 * @param decompositionMode the normalisation mode 163 * @param status reporting a success or an error. 164 * @see Locale 165 * @stable ICU 2.0 166 */ 167 RuleBasedCollator(const UnicodeString& rules, 168 ECollationStrength collationStrength, 169 UColAttributeValue decompositionMode, 170 UErrorCode& status); 171 172 /** 173 * Copy constructor. 174 * @param other the RuleBasedCollator object to be copied 175 * @see Locale 176 * @stable ICU 2.0 177 */ 178 RuleBasedCollator(const RuleBasedCollator& other); 179 180 181 /** Opens a collator from a collator binary image created using 182 * cloneBinary. Binary image used in instantiation of the 183 * collator remains owned by the user and should stay around for 184 * the lifetime of the collator. The API also takes a base collator 185 * which usualy should be UCA. 186 * @param bin binary image owned by the user and required through the 187 * lifetime of the collator 188 * @param length size of the image. If negative, the API will try to 189 * figure out the length of the image 190 * @param base fallback collator, usually UCA. Base is required to be 191 * present through the lifetime of the collator. Currently 192 * it cannot be NULL. 193 * @param status for catching errors 194 * @return newly created collator 195 * @see cloneBinary 196 * @stable ICU 3.4 197 */ 198 RuleBasedCollator(const uint8_t *bin, int32_t length, 199 const RuleBasedCollator *base, 200 UErrorCode &status); 201 // destructor -------------------------------------------------------------- 202 203 /** 204 * Destructor. 205 * @stable ICU 2.0 206 */ 207 virtual ~RuleBasedCollator(); 208 209 // public methods ---------------------------------------------------------- 210 211 /** 212 * Assignment operator. 213 * @param other other RuleBasedCollator object to compare with. 214 * @stable ICU 2.0 215 */ 216 RuleBasedCollator& operator=(const RuleBasedCollator& other); 217 218 /** 219 * Returns true if argument is the same as this object. 220 * @param other Collator object to be compared. 221 * @return true if arguments is the same as this object. 222 * @stable ICU 2.0 223 */ 224 virtual UBool operator==(const Collator& other) const; 225 226 /** 227 * Returns true if argument is not the same as this object. 228 * @param other Collator object to be compared 229 * @return returns true if argument is not the same as this object. 230 * @stable ICU 2.0 231 */ 232 virtual UBool operator!=(const Collator& other) const; 233 234 /** 235 * Makes a deep copy of the object. 236 * The caller owns the returned object. 237 * @return the cloned object. 238 * @stable ICU 2.0 239 */ 240 virtual Collator* clone(void) const; 241 242 /** 243 * Creates a collation element iterator for the source string. The caller of 244 * this method is responsible for the memory management of the return 245 * pointer. 246 * @param source the string over which the CollationElementIterator will 247 * iterate. 248 * @return the collation element iterator of the source string using this as 249 * the based Collator. 250 * @stable ICU 2.2 251 */ 252 virtual CollationElementIterator* createCollationElementIterator( 253 const UnicodeString& source) const; 254 255 /** 256 * Creates a collation element iterator for the source. The caller of this 257 * method is responsible for the memory management of the returned pointer. 258 * @param source the CharacterIterator which produces the characters over 259 * which the CollationElementItgerator will iterate. 260 * @return the collation element iterator of the source using this as the 261 * based Collator. 262 * @stable ICU 2.2 263 */ 264 virtual CollationElementIterator* createCollationElementIterator( 265 const CharacterIterator& source) const; 266 267 /** 268 * Compares a range of character data stored in two different strings based 269 * on the collation rules. Returns information about whether a string is 270 * less than, greater than or equal to another string in a language. 271 * This can be overriden in a subclass. 272 * @param source the source string. 273 * @param target the target string to be compared with the source string. 274 * @return the comparison result. GREATER if the source string is greater 275 * than the target string, LESS if the source is less than the 276 * target. Otherwise, returns EQUAL. 277 * @deprecated ICU 2.6 Use overload with UErrorCode& 278 */ 279 virtual EComparisonResult compare(const UnicodeString& source, 280 const UnicodeString& target) const; 281 282 283 /** 284 * The comparison function compares the character data stored in two 285 * different strings. Returns information about whether a string is less 286 * than, greater than or equal to another string. 287 * @param source the source string to be compared with. 288 * @param target the string that is to be compared with the source string. 289 * @param status possible error code 290 * @return Returns an enum value. UCOL_GREATER if source is greater 291 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less 292 * than target 293 * @stable ICU 2.6 294 **/ 295 virtual UCollationResult compare(const UnicodeString& source, 296 const UnicodeString& target, 297 UErrorCode &status) const; 298 299 /** 300 * Compares a range of character data stored in two different strings based 301 * on the collation rules up to the specified length. Returns information 302 * about whether a string is less than, greater than or equal to another 303 * string in a language. This can be overriden in a subclass. 304 * @param source the source string. 305 * @param target the target string to be compared with the source string. 306 * @param length compares up to the specified length 307 * @return the comparison result. GREATER if the source string is greater 308 * than the target string, LESS if the source is less than the 309 * target. Otherwise, returns EQUAL. 310 * @deprecated ICU 2.6 Use overload with UErrorCode& 311 */ 312 virtual EComparisonResult compare(const UnicodeString& source, 313 const UnicodeString& target, 314 int32_t length) const; 315 316 /** 317 * Does the same thing as compare but limits the comparison to a specified 318 * length 319 * @param source the source string to be compared with. 320 * @param target the string that is to be compared with the source string. 321 * @param length the length the comparison is limited to 322 * @param status possible error code 323 * @return Returns an enum value. UCOL_GREATER if source (up to the specified 324 * length) is greater than target; UCOL_EQUAL if source (up to specified 325 * length) is equal to target; UCOL_LESS if source (up to the specified 326 * length) is less than target. 327 * @stable ICU 2.6 328 */ 329 virtual UCollationResult compare(const UnicodeString& source, 330 const UnicodeString& target, 331 int32_t length, 332 UErrorCode &status) const; 333 334 /** 335 * The comparison function compares the character data stored in two 336 * different string arrays. Returns information about whether a string array 337 * is less than, greater than or equal to another string array. 338 * <p>Example of use: 339 * <pre> 340 * . UChar ABC[] = {0x41, 0x42, 0x43, 0}; // = "ABC" 341 * . UChar abc[] = {0x61, 0x62, 0x63, 0}; // = "abc" 342 * . UErrorCode status = U_ZERO_ERROR; 343 * . Collator *myCollation = 344 * . Collator::createInstance(Locale::US, status); 345 * . if (U_FAILURE(status)) return; 346 * . myCollation->setStrength(Collator::PRIMARY); 347 * . // result would be Collator::EQUAL ("abc" == "ABC") 348 * . // (no primary difference between "abc" and "ABC") 349 * . Collator::EComparisonResult result = 350 * . myCollation->compare(abc, 3, ABC, 3); 351 * . myCollation->setStrength(Collator::TERTIARY); 352 * . // result would be Collator::LESS ("abc" <<< "ABC") 353 * . // (with tertiary difference between "abc" and "ABC") 354 * . result = myCollation->compare(abc, 3, ABC, 3); 355 * </pre> 356 * @param source the source string array to be compared with. 357 * @param sourceLength the length of the source string array. If this value 358 * is equal to -1, the string array is null-terminated. 359 * @param target the string that is to be compared with the source string. 360 * @param targetLength the length of the target string array. If this value 361 * is equal to -1, the string array is null-terminated. 362 * @return Returns a byte value. GREATER if source is greater than target; 363 * EQUAL if source is equal to target; LESS if source is less than 364 * target 365 * @deprecated ICU 2.6 Use overload with UErrorCode& 366 */ 367 virtual EComparisonResult compare(const UChar* source, int32_t sourceLength, 368 const UChar* target, int32_t targetLength) 369 const; 370 371 /** 372 * The comparison function compares the character data stored in two 373 * different string arrays. Returns information about whether a string array 374 * is less than, greater than or equal to another string array. 375 * @param source the source string array to be compared with. 376 * @param sourceLength the length of the source string array. If this value 377 * is equal to -1, the string array is null-terminated. 378 * @param target the string that is to be compared with the source string. 379 * @param targetLength the length of the target string array. If this value 380 * is equal to -1, the string array is null-terminated. 381 * @param status possible error code 382 * @return Returns an enum value. UCOL_GREATER if source is greater 383 * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less 384 * than target 385 * @stable ICU 2.6 386 */ 387 virtual UCollationResult compare(const UChar* source, int32_t sourceLength, 388 const UChar* target, int32_t targetLength, 389 UErrorCode &status) const; 390 391 /** 392 * Compares two strings using the Collator. 393 * Returns whether the first one compares less than/equal to/greater than 394 * the second one. 395 * This version takes UCharIterator input. 396 * @param sIter the first ("source") string iterator 397 * @param tIter the second ("target") string iterator 398 * @param status ICU status 399 * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER 400 * @stable ICU 4.2 401 */ 402 virtual UCollationResult compare(UCharIterator &sIter, 403 UCharIterator &tIter, 404 UErrorCode &status) const; 405 406 /** 407 * Transforms a specified region of the string into a series of characters 408 * that can be compared with CollationKey.compare. Use a CollationKey when 409 * you need to do repeated comparisions on the same string. For a single 410 * comparison the compare method will be faster. 411 * @param source the source string. 412 * @param key the transformed key of the source string. 413 * @param status the error code status. 414 * @return the transformed key. 415 * @see CollationKey 416 * @deprecated ICU 2.8 Use getSortKey(...) instead 417 */ 418 virtual CollationKey& getCollationKey(const UnicodeString& source, 419 CollationKey& key, 420 UErrorCode& status) const; 421 422 /** 423 * Transforms a specified region of the string into a series of characters 424 * that can be compared with CollationKey.compare. Use a CollationKey when 425 * you need to do repeated comparisions on the same string. For a single 426 * comparison the compare method will be faster. 427 * @param source the source string. 428 * @param sourceLength the length of the source string. 429 * @param key the transformed key of the source string. 430 * @param status the error code status. 431 * @return the transformed key. 432 * @see CollationKey 433 * @deprecated ICU 2.8 Use getSortKey(...) instead 434 */ 435 virtual CollationKey& getCollationKey(const UChar *source, 436 int32_t sourceLength, 437 CollationKey& key, 438 UErrorCode& status) const; 439 440 /** 441 * Generates the hash code for the rule-based collation object. 442 * @return the hash code. 443 * @stable ICU 2.0 444 */ 445 virtual int32_t hashCode(void) const; 446 447 /** 448 * Gets the locale of the Collator 449 * @param type can be either requested, valid or actual locale. For more 450 * information see the definition of ULocDataLocaleType in 451 * uloc.h 452 * @param status the error code status. 453 * @return locale where the collation data lives. If the collator 454 * was instantiated from rules, locale is empty. 455 * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback 456 */ 457 virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; 458 459 /** 460 * Gets the table-based rules for the collation object. 461 * @return returns the collation rules that the table collation object was 462 * created from. 463 * @stable ICU 2.0 464 */ 465 const UnicodeString& getRules(void) const; 466 467 /** 468 * Gets the version information for a Collator. 469 * @param info the version # information, the result will be filled in 470 * @stable ICU 2.0 471 */ 472 virtual void getVersion(UVersionInfo info) const; 473 474 /** 475 * Return the maximum length of any expansion sequences that end with the 476 * specified comparison order. 477 * @param order a collation order returned by previous or next. 478 * @return maximum size of the expansion sequences ending with the collation 479 * element or 1 if collation element does not occur at the end of 480 * any expansion sequence 481 * @see CollationElementIterator#getMaxExpansion 482 * @stable ICU 2.0 483 */ 484 int32_t getMaxExpansion(int32_t order) const; 485 486 /** 487 * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This 488 * method is to implement a simple version of RTTI, since not all C++ 489 * compilers support genuine RTTI. Polymorphic operator==() and clone() 490 * methods call this method. 491 * @return The class ID for this object. All objects of a given class have 492 * the same class ID. Objects of other classes have different class 493 * IDs. 494 * @stable ICU 2.0 495 */ 496 virtual UClassID getDynamicClassID(void) const; 497 498 /** 499 * Returns the class ID for this class. This is useful only for comparing to 500 * a return value from getDynamicClassID(). For example: 501 * <pre> 502 * Base* polymorphic_pointer = createPolymorphicObject(); 503 * if (polymorphic_pointer->getDynamicClassID() == 504 * Derived::getStaticClassID()) ... 505 * </pre> 506 * @return The class ID for all objects of this class. 507 * @stable ICU 2.0 508 */ 509 static UClassID U_EXPORT2 getStaticClassID(void); 510 511 /** 512 * Returns the binary format of the class's rules. The format is that of 513 * .col files. 514 * @param length Returns the length of the data, in bytes 515 * @param status the error code status. 516 * @return memory, owned by the caller, of size 'length' bytes. 517 * @stable ICU 2.2 518 */ 519 uint8_t *cloneRuleData(int32_t &length, UErrorCode &status); 520 521 522 /** Creates a binary image of a collator. This binary image can be stored and 523 * later used to instantiate a collator using ucol_openBinary. 524 * This API supports preflighting. 525 * @param buffer a fill-in buffer to receive the binary image 526 * @param capacity capacity of the destination buffer 527 * @param status for catching errors 528 * @return size of the image 529 * @see ucol_openBinary 530 * @stable ICU 3.4 531 */ 532 int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status); 533 534 /** 535 * Returns current rules. Delta defines whether full rules are returned or 536 * just the tailoring. 537 * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 538 * @param buffer UnicodeString to store the result rules 539 * @stable ICU 2.2 540 */ 541 void getRules(UColRuleOption delta, UnicodeString &buffer); 542 543 /** 544 * Universal attribute setter 545 * @param attr attribute type 546 * @param value attribute value 547 * @param status to indicate whether the operation went on smoothly or there were errors 548 * @stable ICU 2.2 549 */ 550 virtual void setAttribute(UColAttribute attr, UColAttributeValue value, 551 UErrorCode &status); 552 553 /** 554 * Universal attribute getter. 555 * @param attr attribute type 556 * @param status to indicate whether the operation went on smoothly or there were errors 557 * @return attribute value 558 * @stable ICU 2.2 559 */ 560 virtual UColAttributeValue getAttribute(UColAttribute attr, 561 UErrorCode &status); 562 563 /** 564 * Sets the variable top to a collation element value of a string supplied. 565 * @param varTop one or more (if contraction) UChars to which the variable top should be set 566 * @param len length of variable top string. If -1 it is considered to be zero terminated. 567 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> 568 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br> 569 * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes 570 * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined 571 * @stable ICU 2.0 572 */ 573 virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status); 574 575 /** 576 * Sets the variable top to a collation element value of a string supplied. 577 * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set 578 * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br> 579 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br> 580 * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes 581 * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined 582 * @stable ICU 2.0 583 */ 584 virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status); 585 586 /** 587 * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits. 588 * Lower 16 bits are ignored. 589 * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop 590 * @param status error code (not changed by function) 591 * @stable ICU 2.0 592 */ 593 virtual void setVariableTop(const uint32_t varTop, UErrorCode &status); 594 595 /** 596 * Gets the variable top value of a Collator. 597 * Lower 16 bits are undefined and should be ignored. 598 * @param status error code (not changed by function). If error code is set, the return value is undefined. 599 * @stable ICU 2.0 600 */ 601 virtual uint32_t getVariableTop(UErrorCode &status) const; 602 603 /** 604 * Get an UnicodeSet that contains all the characters and sequences tailored in 605 * this collator. 606 * @param status error code of the operation 607 * @return a pointer to a UnicodeSet object containing all the 608 * code points and sequences that may sort differently than 609 * in the UCA. The object must be disposed of by using delete 610 * @stable ICU 2.4 611 */ 612 virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; 613 614 /** 615 * Thread safe cloning operation. 616 * @return pointer to the new clone, user should remove it. 617 * @stable ICU 2.2 618 */ 619 virtual Collator* safeClone(void); 620 621 /** 622 * Get the sort key as an array of bytes from an UnicodeString. 623 * @param source string to be processed. 624 * @param result buffer to store result in. If NULL, number of bytes needed 625 * will be returned. 626 * @param resultLength length of the result buffer. If if not enough the 627 * buffer will be filled to capacity. 628 * @return Number of bytes needed for storing the sort key 629 * @stable ICU 2.0 630 */ 631 virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result, 632 int32_t resultLength) const; 633 634 /** 635 * Get the sort key as an array of bytes from an UChar buffer. 636 * @param source string to be processed. 637 * @param sourceLength length of string to be processed. If -1, the string 638 * is 0 terminated and length will be decided by the function. 639 * @param result buffer to store result in. If NULL, number of bytes needed 640 * will be returned. 641 * @param resultLength length of the result buffer. If if not enough the 642 * buffer will be filled to capacity. 643 * @return Number of bytes needed for storing the sort key 644 * @stable ICU 2.2 645 */ 646 virtual int32_t getSortKey(const UChar *source, int32_t sourceLength, 647 uint8_t *result, int32_t resultLength) const; 648 649 /** 650 * Determines the minimum strength that will be use in comparison or 651 * transformation. 652 * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored 653 * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference 654 * are ignored. 655 * @return the current comparison level. 656 * @see RuleBasedCollator#setStrength 657 * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead 658 */ 659 virtual ECollationStrength getStrength(void) const; 660 661 /** 662 * Sets the minimum strength to be used in comparison or transformation. 663 * @see RuleBasedCollator#getStrength 664 * @param newStrength the new comparison level. 665 * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead 666 */ 667 virtual void setStrength(ECollationStrength newStrength); 668 669 /** 670 * Retrieves the reordering codes for this collator. 671 * @param dest The array to fill with the script ordering. 672 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 673 * will only return the length of the result without writing any of the result string (pre-flighting). 674 * @param status A reference to an error code value, which must not indicate 675 * a failure before the function call. 676 * @return The length of the script ordering array. 677 * @see ucol_setReorderCodes 678 * @see Collator#getEquivalentReorderCodes 679 * @see Collator#setReorderCodes 680 * @draft ICU 4.8 681 */ 682 virtual int32_t U_EXPORT2 getReorderCodes(int32_t *dest, 683 int32_t destCapacity, 684 UErrorCode& status) const; 685 686 /** 687 * Sets the ordering of scripts for this collator. 688 * @param reorderCodes An array of script codes in the new order. This can be NULL if the 689 * length is also set to 0. An empty array will clear any reordering codes on the collator. 690 * @param reorderCodesLength The length of reorderCodes. 691 * @param status error code 692 * @see Collator#getReorderCodes 693 * @see Collator#getEquivalentReorderCodes 694 * @draft ICU 4.8 695 */ 696 virtual void U_EXPORT2 setReorderCodes(const int32_t* reorderCodes, 697 int32_t reorderCodesLength, 698 UErrorCode& status) ; 699 700 /** 701 * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder 702 * codes will be grouped and must reorder together. 703 * @param reorderCode The reorder code to determine equivalence for. 704 * @param dest The array to fill with the script equivalene reordering codes. 705 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the 706 * function will only return the length of the result without writing any of the result 707 * string (pre-flighting). 708 * @param status A reference to an error code value, which must not indicate 709 * a failure before the function call. 710 * @return The length of the of the reordering code equivalence array. 711 * @see ucol_setReorderCodes 712 * @see Collator#getReorderCodes 713 * @see Collator#setReorderCodes 714 * @draft ICU 4.8 715 */ 716 static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode, 717 int32_t* dest, 718 int32_t destCapacity, 719 UErrorCode& status); 720 721 722 private: 723 724 // private static constants ----------------------------------------------- 725 726 enum { 727 /* need look up in .commit() */ 728 CHARINDEX = 0x70000000, 729 /* Expand index follows */ 730 EXPANDCHARINDEX = 0x7E000000, 731 /* contract indexes follows */ 732 CONTRACTCHARINDEX = 0x7F000000, 733 /* unmapped character values */ 734 UNMAPPED = 0xFFFFFFFF, 735 /* primary strength increment */ 736 PRIMARYORDERINCREMENT = 0x00010000, 737 /* secondary strength increment */ 738 SECONDARYORDERINCREMENT = 0x00000100, 739 /* tertiary strength increment */ 740 TERTIARYORDERINCREMENT = 0x00000001, 741 /* mask off anything but primary order */ 742 PRIMARYORDERMASK = 0xffff0000, 743 /* mask off anything but secondary order */ 744 SECONDARYORDERMASK = 0x0000ff00, 745 /* mask off anything but tertiary order */ 746 TERTIARYORDERMASK = 0x000000ff, 747 /* mask off ignorable char order */ 748 IGNORABLEMASK = 0x0000ffff, 749 /* use only the primary difference */ 750 PRIMARYDIFFERENCEONLY = 0xffff0000, 751 /* use only the primary and secondary difference */ 752 SECONDARYDIFFERENCEONLY = 0xffffff00, 753 /* primary order shift */ 754 PRIMARYORDERSHIFT = 16, 755 /* secondary order shift */ 756 SECONDARYORDERSHIFT = 8, 757 /* starting value for collation elements */ 758 COLELEMENTSTART = 0x02020202, 759 /* testing mask for primary low element */ 760 PRIMARYLOWZEROMASK = 0x00FF0000, 761 /* reseting value for secondaries and tertiaries */ 762 RESETSECONDARYTERTIARY = 0x00000202, 763 /* reseting value for tertiaries */ 764 RESETTERTIARY = 0x00000002, 765 766 PRIMIGNORABLE = 0x0202 767 }; 768 769 // private data members --------------------------------------------------- 770 771 UBool dataIsOwned; 772 773 UBool isWriteThroughAlias; 774 775 /** 776 * c struct for collation. All initialisation for it has to be done through 777 * setUCollator(). 778 */ 779 UCollator *ucollator; 780 781 /** 782 * Rule UnicodeString 783 */ 784 UnicodeString urulestring; 785 786 // friend classes -------------------------------------------------------- 787 788 /** 789 * Used to iterate over collation elements in a character source. 790 */ 791 friend class CollationElementIterator; 792 793 /** 794 * Collator ONLY needs access to RuleBasedCollator(const Locale&, 795 * UErrorCode&) 796 */ 797 friend class Collator; 798 799 /** 800 * Searching over collation elements in a character source 801 */ 802 friend class StringSearch; 803 804 // private constructors -------------------------------------------------- 805 806 /** 807 * Default constructor 808 */ 809 RuleBasedCollator(); 810 811 /** 812 * RuleBasedCollator constructor. This constructor takes a locale. The 813 * only caller of this class should be Collator::createInstance(). If 814 * createInstance() happens to know that the requested locale's collation is 815 * implemented as a RuleBasedCollator, it can then call this constructor. 816 * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID 817 * COLLATION TABLE. It does this by falling back to defaults. 818 * @param desiredLocale locale used 819 * @param status error code status 820 */ 821 RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status); 822 823 /** 824 * common constructor implementation 825 * 826 * @param rules the collation rules to build the collation table from. 827 * @param collationStrength default strength for comparison 828 * @param decompositionMode the normalisation mode 829 * @param status reporting a success or an error. 830 */ 831 void 832 construct(const UnicodeString& rules, 833 UColAttributeValue collationStrength, 834 UColAttributeValue decompositionMode, 835 UErrorCode& status); 836 837 // private methods ------------------------------------------------------- 838 839 /** 840 * Creates the c struct for ucollator 841 * @param locale desired locale 842 * @param status error status 843 */ 844 void setUCollator(const Locale& locale, UErrorCode& status); 845 846 /** 847 * Creates the c struct for ucollator 848 * @param locale desired locale name 849 * @param status error status 850 */ 851 void setUCollator(const char* locale, UErrorCode& status); 852 853 /** 854 * Creates the c struct for ucollator. This used internally by StringSearch. 855 * Hence the responsibility of cleaning up the ucollator is not done by 856 * this RuleBasedCollator. The isDataOwned flag is set to FALSE. 857 * @param collator new ucollator data 858 */ 859 void setUCollator(UCollator *collator); 860 861 public: 862 /** 863 * Get UCollator data struct. Used only by StringSearch & intltest. 864 * @return UCollator data struct 865 * @internal 866 */ 867 const UCollator * getUCollator(); 868 869 protected: 870 /** 871 * Used internally by registraton to define the requested and valid locales. 872 * @param requestedLocale the requsted locale 873 * @param validLocale the valid locale 874 * @param actualLocale the actual locale 875 * @internal 876 */ 877 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale); 878 879 private: 880 881 // if not owned and not a write through alias, copy the ucollator 882 void checkOwned(void); 883 884 // utility to init rule string used by checkOwned and construct 885 void setRuleStringFromCollator(); 886 887 /** 888 * Converts C's UCollationResult to EComparisonResult 889 * @param result member of the enum UComparisonResult 890 * @return EComparisonResult equivalent of UCollationResult 891 * @deprecated ICU 2.6. We will not need it. 892 */ 893 Collator::EComparisonResult getEComparisonResult( 894 const UCollationResult &result) const; 895 896 /** 897 * Converts C's UCollationStrength to ECollationStrength 898 * @param strength member of the enum UCollationStrength 899 * @return ECollationStrength equivalent of UCollationStrength 900 */ 901 Collator::ECollationStrength getECollationStrength( 902 const UCollationStrength &strength) const; 903 904 /** 905 * Converts C++'s ECollationStrength to UCollationStrength 906 * @param strength member of the enum ECollationStrength 907 * @return UCollationStrength equivalent of ECollationStrength 908 */ 909 UCollationStrength getUCollationStrength( 910 const Collator::ECollationStrength &strength) const; 911 }; 912 913 // inline method implementation --------------------------------------------- 914 915 inline void RuleBasedCollator::setUCollator(const Locale &locale, 916 UErrorCode &status) 917 { 918 setUCollator(locale.getName(), status); 919 } 920 921 922 inline void RuleBasedCollator::setUCollator(UCollator *collator) 923 { 924 925 if (ucollator && dataIsOwned) { 926 ucol_close(ucollator); 927 } 928 ucollator = collator; 929 dataIsOwned = FALSE; 930 isWriteThroughAlias = TRUE; 931 setRuleStringFromCollator(); 932 } 933 934 inline const UCollator * RuleBasedCollator::getUCollator() 935 { 936 return ucollator; 937 } 938 939 inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult( 940 const UCollationResult &result) const 941 { 942 switch (result) 943 { 944 case UCOL_LESS : 945 return Collator::LESS; 946 case UCOL_EQUAL : 947 return Collator::EQUAL; 948 default : 949 return Collator::GREATER; 950 } 951 } 952 953 inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength( 954 const UCollationStrength &strength) const 955 { 956 switch (strength) 957 { 958 case UCOL_PRIMARY : 959 return Collator::PRIMARY; 960 case UCOL_SECONDARY : 961 return Collator::SECONDARY; 962 case UCOL_TERTIARY : 963 return Collator::TERTIARY; 964 case UCOL_QUATERNARY : 965 return Collator::QUATERNARY; 966 default : 967 return Collator::IDENTICAL; 968 } 969 } 970 971 inline UCollationStrength RuleBasedCollator::getUCollationStrength( 972 const Collator::ECollationStrength &strength) const 973 { 974 switch (strength) 975 { 976 case Collator::PRIMARY : 977 return UCOL_PRIMARY; 978 case Collator::SECONDARY : 979 return UCOL_SECONDARY; 980 case Collator::TERTIARY : 981 return UCOL_TERTIARY; 982 case Collator::QUATERNARY : 983 return UCOL_QUATERNARY; 984 default : 985 return UCOL_IDENTICAL; 986 } 987 } 988 989 U_NAMESPACE_END 990 991 #endif /* #if !UCONFIG_NO_COLLATION */ 992 993 #endif 994