Home | History | Annotate | Download | only in i18n
      1 /*
      2  ******************************************************************************
      3  * Copyright (C) 1996-2011, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  ******************************************************************************
      6  */
      7 
      8 /**
      9  * File tblcoll.cpp
     10  *
     11  * Created by: Helena Shih
     12  *
     13  * Modification History:
     14  *
     15  *  Date        Name        Description
     16  *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
     17  *                          constructor which reads RuleBasedCollator object from
     18  *                          a binary file.  Added writeToFile method which streams
     19  *                          RuleBasedCollator out to a binary file.  The streamIn
     20  *                          and streamOut methods use istream and ostream objects
     21  *                          in binary mode.
     22  *  2/11/97     aliu        Moved declarations out of for loop initializer.
     23  *                          Added Mac compatibility #ifdef for ios::nocreate.
     24  *  2/12/97     aliu        Modified to use TableCollationData sub-object to
     25  *                          hold invariant data.
     26  *  2/13/97     aliu        Moved several methods into this class from Collation.
     27  *                          Added a private RuleBasedCollator(Locale&) constructor,
     28  *                          to be used by Collator::getInstance().  General
     29  *                          clean up.  Made use of UErrorCode variables consistent.
     30  *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
     31  *                          constructor and getDynamicClassID.
     32  *  3/5/97      aliu        Changed compaction cycle to improve performance.  We
     33  *                          use the maximum allowable value which is kBlockCount.
     34  *                          Modified getRules() to load rules dynamically.  Changed
     35  *                          constructFromFile() call to accomodate this (added
     36  *                          parameter to specify whether binary loading is to
     37  *                          take place).
     38  * 05/06/97     helena      Added memory allocation error check.
     39  *  6/20/97     helena      Java class name change.
     40  *  6/23/97     helena      Adding comments to make code more readable.
     41  * 09/03/97     helena      Added createCollationKeyValues().
     42  * 06/26/98     erm         Changes for CollationKeys using byte arrays.
     43  * 08/10/98     erm         Synched with 1.2 version of RuleBasedCollator.java
     44  * 04/23/99     stephen     Removed EDecompositionMode, merged with
     45  *                          Normalizer::EMode
     46  * 06/14/99     stephen     Removed kResourceBundleSuffix
     47  * 06/22/99     stephen     Fixed logic in constructFromFile() since .ctx
     48  *                          files are no longer used.
     49  * 11/02/99     helena      Collator performance enhancements.  Special case
     50  *                          for NO_OP situations.
     51  * 11/17/99     srl         More performance enhancements. Inlined some internal functions.
     52  * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
     53  *                          to implementation file.
     54  * 01/29/01     synwee      Modified into a C++ wrapper calling C APIs (ucol.h)
     55  */
     56 
     57 #include <typeinfo>  // for 'typeid' to work
     58 
     59 #include "unicode/utypes.h"
     60 
     61 #if !UCONFIG_NO_COLLATION
     62 
     63 #include "unicode/tblcoll.h"
     64 #include "unicode/coleitr.h"
     65 #include "unicode/ures.h"
     66 #include "unicode/uset.h"
     67 #include "ucol_imp.h"
     68 #include "uresimp.h"
     69 #include "uhash.h"
     70 #include "cmemory.h"
     71 #include "cstring.h"
     72 #include "putilimp.h"
     73 
     74 /* public RuleBasedCollator constructor ---------------------------------- */
     75 
     76 U_NAMESPACE_BEGIN
     77 
     78 /**
     79 * Copy constructor, aliasing, not write-through
     80 */
     81 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that)
     82 : Collator(that)
     83 , dataIsOwned(FALSE)
     84 , isWriteThroughAlias(FALSE)
     85 , ucollator(NULL)
     86 {
     87     RuleBasedCollator::operator=(that);
     88 }
     89 
     90 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
     91                                      UErrorCode& status) :
     92 dataIsOwned(FALSE)
     93 {
     94     construct(rules,
     95         UCOL_DEFAULT_STRENGTH,
     96         UCOL_DEFAULT,
     97         status);
     98 }
     99 
    100 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
    101                                      ECollationStrength collationStrength,
    102                                      UErrorCode& status) : dataIsOwned(FALSE)
    103 {
    104     construct(rules,
    105         getUCollationStrength(collationStrength),
    106         UCOL_DEFAULT,
    107         status);
    108 }
    109 
    110 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
    111                                      UColAttributeValue decompositionMode,
    112                                      UErrorCode& status) :
    113 dataIsOwned(FALSE)
    114 {
    115     construct(rules,
    116         UCOL_DEFAULT_STRENGTH,
    117         decompositionMode,
    118         status);
    119 }
    120 
    121 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules,
    122                                      ECollationStrength collationStrength,
    123                                      UColAttributeValue decompositionMode,
    124                                      UErrorCode& status) : dataIsOwned(FALSE)
    125 {
    126     construct(rules,
    127         getUCollationStrength(collationStrength),
    128         decompositionMode,
    129         status);
    130 }
    131 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
    132                     const RuleBasedCollator *base,
    133                     UErrorCode &status) :
    134 dataIsOwned(TRUE),
    135 isWriteThroughAlias(FALSE)
    136 {
    137   ucollator = ucol_openBinary(bin, length, base->ucollator, &status);
    138 }
    139 
    140 void
    141 RuleBasedCollator::setRuleStringFromCollator()
    142 {
    143     int32_t length;
    144     const UChar *r = ucol_getRules(ucollator, &length);
    145 
    146     if (r && length > 0) {
    147         // alias the rules string
    148         urulestring.setTo(TRUE, r, length);
    149     }
    150     else {
    151         urulestring.truncate(0); // Clear string.
    152     }
    153 }
    154 
    155 // not aliasing, not write-through
    156 void
    157 RuleBasedCollator::construct(const UnicodeString& rules,
    158                              UColAttributeValue collationStrength,
    159                              UColAttributeValue decompositionMode,
    160                              UErrorCode& status)
    161 {
    162     ucollator = ucol_openRules(rules.getBuffer(), rules.length(),
    163         decompositionMode, collationStrength,
    164         NULL, &status);
    165 
    166     dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it
    167     isWriteThroughAlias = FALSE;
    168 
    169     if(ucollator == NULL) {
    170         if(U_SUCCESS(status)) {
    171             status = U_MEMORY_ALLOCATION_ERROR;
    172         }
    173         return; // Failure
    174     }
    175 
    176     setRuleStringFromCollator();
    177 }
    178 
    179 /* RuleBasedCollator public destructor ----------------------------------- */
    180 
    181 RuleBasedCollator::~RuleBasedCollator()
    182 {
    183     if (dataIsOwned)
    184     {
    185         ucol_close(ucollator);
    186     }
    187     ucollator = 0;
    188 }
    189 
    190 /* RuleBaseCollator public methods --------------------------------------- */
    191 
    192 UBool RuleBasedCollator::operator==(const Collator& that) const
    193 {
    194   /* only checks for address equals here */
    195   if (Collator::operator==(that))
    196     return TRUE;
    197 
    198   if (typeid(*this) != typeid(that))
    199     return FALSE;  /* not the same class */
    200 
    201   RuleBasedCollator& thatAlias = (RuleBasedCollator&)that;
    202 
    203   // weiv: use C function, commented code below is wrong
    204   return ucol_equals(this->ucollator, thatAlias.ucollator);
    205   /*
    206   synwee : orginal code does not check for data compatibility
    207   */
    208   /*
    209   if (ucollator != thatAlias.ucollator)
    210     return FALSE;
    211 
    212   return TRUE;
    213   */
    214 }
    215 
    216 UBool RuleBasedCollator::operator!=(const Collator& other) const
    217 {
    218     return !(*this == other);
    219 }
    220 
    221 // aliasing, not write-through
    222 RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that)
    223 {
    224     if (this != &that)
    225     {
    226         if (dataIsOwned)
    227         {
    228             ucol_close(ucollator);
    229         }
    230 
    231         urulestring.truncate(0); // empty the rule string
    232         dataIsOwned = TRUE;
    233         isWriteThroughAlias = FALSE;
    234 
    235         UErrorCode intStatus = U_ZERO_ERROR;
    236         int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
    237         ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize,
    238                                         &intStatus);
    239         if (U_SUCCESS(intStatus)) {
    240             setRuleStringFromCollator();
    241         }
    242     }
    243     return *this;
    244 }
    245 
    246 // aliasing, not write-through
    247 Collator* RuleBasedCollator::clone() const
    248 {
    249     return new RuleBasedCollator(*this);
    250 }
    251 
    252 
    253 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
    254                                            (const UnicodeString& source) const
    255 {
    256     UErrorCode status = U_ZERO_ERROR;
    257     CollationElementIterator *result = new CollationElementIterator(source, this,
    258                                                                     status);
    259     if (U_FAILURE(status)) {
    260         delete result;
    261         return NULL;
    262     }
    263 
    264     return result;
    265 }
    266 
    267 /**
    268 * Create a CollationElementIterator object that will iterate over the
    269 * elements in a string, using the collation rules defined in this
    270 * RuleBasedCollator
    271 */
    272 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
    273                                        (const CharacterIterator& source) const
    274 {
    275     UErrorCode status = U_ZERO_ERROR;
    276     CollationElementIterator *result = new CollationElementIterator(source, this,
    277                                                                     status);
    278 
    279     if (U_FAILURE(status)) {
    280         delete result;
    281         return NULL;
    282     }
    283 
    284     return result;
    285 }
    286 
    287 /**
    288 * Return a string representation of this collator's rules. The string can
    289 * later be passed to the constructor that takes a UnicodeString argument,
    290 * which will construct a collator that's functionally identical to this one.
    291 * You can also allow users to edit the string in order to change the collation
    292 * data, or you can print it out for inspection, or whatever.
    293 */
    294 const UnicodeString& RuleBasedCollator::getRules() const
    295 {
    296     return urulestring;
    297 }
    298 
    299 void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer)
    300 {
    301     int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1);
    302 
    303     if (rulesize > 0) {
    304         UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) );
    305         if(rules != NULL) {
    306             ucol_getRulesEx(ucollator, delta, rules, rulesize);
    307             buffer.setTo(rules, rulesize);
    308             uprv_free(rules);
    309         } else { // couldn't allocate
    310             buffer.remove();
    311         }
    312     }
    313     else {
    314         buffer.remove();
    315     }
    316 }
    317 
    318 UnicodeSet *
    319 RuleBasedCollator::getTailoredSet(UErrorCode &status) const
    320 {
    321     if(U_FAILURE(status)) {
    322         return NULL;
    323     }
    324     return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status);
    325 }
    326 
    327 
    328 void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const
    329 {
    330     if (versionInfo!=NULL){
    331         ucol_getVersion(ucollator, versionInfo);
    332     }
    333 }
    334 
    335 Collator::EComparisonResult RuleBasedCollator::compare(
    336                                                const UnicodeString& source,
    337                                                const UnicodeString& target,
    338                                                int32_t length) const
    339 {
    340     UErrorCode status = U_ZERO_ERROR;
    341     return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status));
    342 }
    343 
    344 UCollationResult RuleBasedCollator::compare(
    345                                                const UnicodeString& source,
    346                                                const UnicodeString& target,
    347                                                int32_t length,
    348                                                UErrorCode &status) const
    349 {
    350     return compare(source.getBuffer(), uprv_min(length,source.length()), target.getBuffer(), uprv_min(length,target.length()), status);
    351 }
    352 
    353 Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source,
    354                                                        int32_t sourceLength,
    355                                                        const UChar* target,
    356                                                        int32_t targetLength)
    357                                                        const
    358 {
    359     return  getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength,
    360                                                          target, targetLength));
    361 }
    362 
    363 UCollationResult RuleBasedCollator::compare(const UChar* source,
    364                                                        int32_t sourceLength,
    365                                                        const UChar* target,
    366                                                        int32_t targetLength,
    367                                                        UErrorCode &status) const
    368 {
    369     if(U_SUCCESS(status)) {
    370         return  ucol_strcoll(ucollator, source, sourceLength, target, targetLength);
    371     } else {
    372         return UCOL_EQUAL;
    373     }
    374 }
    375 
    376 /**
    377 * Compare two strings using this collator
    378 */
    379 Collator::EComparisonResult RuleBasedCollator::compare(
    380                                              const UnicodeString& source,
    381                                              const UnicodeString& target) const
    382 {
    383     return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), source.length(),
    384                                                         target.getBuffer(), target.length()));
    385 }
    386 
    387 UCollationResult RuleBasedCollator::compare(
    388                                              const UnicodeString& source,
    389                                              const UnicodeString& target,
    390                                              UErrorCode &status) const
    391 {
    392     if(U_SUCCESS(status)) {
    393         return ucol_strcoll(ucollator, source.getBuffer(), source.length(),
    394                                        target.getBuffer(), target.length());
    395     } else {
    396         return UCOL_EQUAL;
    397     }
    398 }
    399 
    400 UCollationResult RuleBasedCollator::compare(UCharIterator &sIter,
    401                                             UCharIterator &tIter,
    402                                             UErrorCode &status) const {
    403     if(U_SUCCESS(status)) {
    404         return ucol_strcollIter(ucollator, &sIter, &tIter, &status);
    405     } else {
    406         return UCOL_EQUAL;
    407     }
    408 }
    409 
    410 /**
    411 * Retrieve a collation key for the specified string. The key can be compared
    412 * with other collation keys using a bitwise comparison (e.g. memcmp) to find
    413 * the ordering of their respective source strings. This is handy when doing a
    414 * sort, where each sort key must be compared many times.
    415 *
    416 * The basic algorithm here is to find all of the collation elements for each
    417 * character in the source string, convert them to an ASCII representation, and
    418 * put them into the collation key.  But it's trickier than that. Each
    419 * collation element in a string has three components: primary ('A' vs 'B'),
    420 * secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary difference
    421 * at the end of a string takes precedence over a secondary or tertiary
    422 * difference earlier in the string.
    423 *
    424 * To account for this, we put all of the primary orders at the beginning of
    425 * the string, followed by the secondary and tertiary orders. Each set of
    426 * orders is terminated by nulls so that a key for a string which is a initial
    427 * substring of another key will compare less without any special case.
    428 *
    429 * Here's a hypothetical example, with the collation element represented as a
    430 * three-digit number, one digit for primary, one for secondary, etc.
    431 *
    432 * String:              A     a     B    \u00C9
    433 * Collation Elements: 101   100   201  511
    434 * Collation Key:      1125<null>0001<null>1011<null>
    435 *
    436 * To make things even trickier, secondary differences (accent marks) are
    437 * compared starting at the *end* of the string in languages with French
    438 * secondary ordering. But when comparing the accent marks on a single base
    439 * character, they are compared from the beginning. To handle this, we reverse
    440 * all of the accents that belong to each base character, then we reverse the
    441 * entire string of secondary orderings at the end.
    442 */
    443 CollationKey& RuleBasedCollator::getCollationKey(
    444                                                   const UnicodeString& source,
    445                                                   CollationKey& sortkey,
    446                                                   UErrorCode& status) const
    447 {
    448     return getCollationKey(source.getBuffer(), source.length(), sortkey, status);
    449 }
    450 
    451 CollationKey& RuleBasedCollator::getCollationKey(const UChar* source,
    452                                                     int32_t sourceLen,
    453                                                     CollationKey& sortkey,
    454                                                     UErrorCode& status) const
    455 {
    456     if (U_FAILURE(status)) {
    457         return sortkey.setToBogus();
    458     }
    459     if (sourceLen < -1 || (source == NULL && sourceLen != 0)) {
    460         status = U_ILLEGAL_ARGUMENT_ERROR;
    461         return sortkey.setToBogus();
    462     }
    463 
    464     if (sourceLen < 0) {
    465         sourceLen = u_strlen(source);
    466     }
    467     if (sourceLen == 0) {
    468         return sortkey.reset();
    469     }
    470 
    471     uint8_t *result;
    472     int32_t resultCapacity;
    473     if (sortkey.fCapacity >= (sourceLen * 3)) {
    474         // Try to reuse the CollationKey.fBytes.
    475         result = sortkey.fBytes;
    476         resultCapacity = sortkey.fCapacity;
    477     } else {
    478         result = NULL;
    479         resultCapacity = 0;
    480     }
    481     int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator, source, sourceLen,
    482                                                       result, resultCapacity, &status);
    483 
    484     if (U_SUCCESS(status)) {
    485         if (result == sortkey.fBytes) {
    486             sortkey.setLength(resultLen);
    487         } else {
    488             sortkey.adopt(result, resultCapacity, resultLen);
    489         }
    490     } else {
    491         if (result != sortkey.fBytes) {
    492             uprv_free(result);
    493         }
    494         sortkey.setToBogus();
    495     }
    496     return sortkey;
    497 }
    498 
    499 /**
    500  * Return the maximum length of any expansion sequences that end with the
    501  * specified comparison order.
    502  * @param order a collation order returned by previous or next.
    503  * @return the maximum length of any expansion seuences ending with the
    504  *         specified order or 1 if collation order does not occur at the end of any
    505  *         expansion sequence.
    506  * @see CollationElementIterator#getMaxExpansion
    507  */
    508 int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const
    509 {
    510     uint8_t result;
    511     UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result);
    512     return result;
    513 }
    514 
    515 uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length,
    516                                               UErrorCode &status)
    517 {
    518     return ucol_cloneRuleData(ucollator, &length, &status);
    519 }
    520 
    521 
    522 int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status)
    523 {
    524   return ucol_cloneBinary(ucollator, buffer, capacity, &status);
    525 }
    526 
    527 void RuleBasedCollator::setAttribute(UColAttribute attr,
    528                                      UColAttributeValue value,
    529                                      UErrorCode &status)
    530 {
    531     if (U_FAILURE(status))
    532         return;
    533     checkOwned();
    534     ucol_setAttribute(ucollator, attr, value, &status);
    535 }
    536 
    537 UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr,
    538                                                       UErrorCode &status)
    539 {
    540     if (U_FAILURE(status))
    541         return UCOL_DEFAULT;
    542     return ucol_getAttribute(ucollator, attr, &status);
    543 }
    544 
    545 uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) {
    546     checkOwned();
    547     return ucol_setVariableTop(ucollator, varTop, len, &status);
    548 }
    549 
    550 uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCode &status) {
    551     checkOwned();
    552     return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &status);
    553 }
    554 
    555 void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) {
    556     checkOwned();
    557     ucol_restoreVariableTop(ucollator, varTop, &status);
    558 }
    559 
    560 uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const {
    561   return ucol_getVariableTop(ucollator, &status);
    562 }
    563 
    564 Collator* RuleBasedCollator::safeClone(void)
    565 {
    566     UErrorCode intStatus = U_ZERO_ERROR;
    567     int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE;
    568     UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize,
    569                                     &intStatus);
    570     if (U_FAILURE(intStatus)) {
    571         return NULL;
    572     }
    573 
    574     RuleBasedCollator *result = new RuleBasedCollator();
    575     // Null pointer check
    576     if (result != NULL) {
    577 	    result->ucollator = ucol;
    578 	    result->dataIsOwned = TRUE;
    579 	    result->isWriteThroughAlias = FALSE;
    580 	    setRuleStringFromCollator();
    581     }
    582 
    583     return result;
    584 }
    585 
    586 
    587 int32_t RuleBasedCollator::getSortKey(const UnicodeString& source,
    588                                          uint8_t *result, int32_t resultLength)
    589                                          const
    590 {
    591     return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), result, resultLength);
    592 }
    593 
    594 int32_t RuleBasedCollator::getSortKey(const UChar *source,
    595                                          int32_t sourceLength, uint8_t *result,
    596                                          int32_t resultLength) const
    597 {
    598     return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength);
    599 }
    600 
    601 Collator::ECollationStrength RuleBasedCollator::getStrength(void) const
    602 {
    603     UErrorCode intStatus = U_ZERO_ERROR;
    604     return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH,
    605                                 &intStatus));
    606 }
    607 
    608 void RuleBasedCollator::setStrength(ECollationStrength newStrength)
    609 {
    610     checkOwned();
    611     UErrorCode intStatus = U_ZERO_ERROR;
    612     UCollationStrength strength = getUCollationStrength(newStrength);
    613     ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus);
    614 }
    615 
    616 int32_t RuleBasedCollator::getReorderCodes(int32_t *dest,
    617                                           int32_t destCapacity,
    618                                           UErrorCode& status) const
    619 {
    620     return ucol_getReorderCodes(ucollator, dest, destCapacity, &status);
    621 }
    622 
    623 void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes,
    624                                        int32_t reorderCodesLength,
    625                                        UErrorCode& status)
    626 {
    627     checkOwned();
    628     ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status);
    629 }
    630 
    631 int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode,
    632                                 int32_t* dest,
    633                                 int32_t destCapacity,
    634                                 UErrorCode& status)
    635 {
    636     return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &status);
    637 }
    638 
    639 /**
    640 * Create a hash code for this collation. Just hash the main rule table -- that
    641 * should be good enough for almost any use.
    642 */
    643 int32_t RuleBasedCollator::hashCode() const
    644 {
    645     int32_t length;
    646     const UChar *rules = ucol_getRules(ucollator, &length);
    647     return uhash_hashUCharsN(rules, length);
    648 }
    649 
    650 /**
    651 * return the locale of this collator
    652 */
    653 const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status) const {
    654     const char *result = ucol_getLocaleByType(ucollator, type, &status);
    655     if(result == NULL) {
    656         Locale res("");
    657         res.setToBogus();
    658         return res;
    659     } else {
    660         return Locale(result);
    661     }
    662 }
    663 
    664 void
    665 RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) {
    666     checkOwned();
    667     char* rloc  = uprv_strdup(requestedLocale.getName());
    668     if (rloc) {
    669         char* vloc = uprv_strdup(validLocale.getName());
    670         if (vloc) {
    671             char* aloc = uprv_strdup(actualLocale.getName());
    672             if (aloc) {
    673                 ucol_setReqValidLocales(ucollator, rloc, vloc, aloc);
    674                 return;
    675             }
    676             uprv_free(vloc);
    677         }
    678         uprv_free(rloc);
    679     }
    680 }
    681 
    682 // RuleBaseCollatorNew private constructor ----------------------------------
    683 
    684 RuleBasedCollator::RuleBasedCollator()
    685   : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
    686 {
    687 }
    688 
    689 RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale,
    690                                            UErrorCode& status)
    691  : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL)
    692 {
    693     if (U_FAILURE(status))
    694         return;
    695 
    696     /*
    697     Try to load, in order:
    698      1. The desired locale's collation.
    699      2. A fallback of the desired locale.
    700      3. The default locale's collation.
    701      4. A fallback of the default locale.
    702      5. The default collation rules, which contains en_US collation rules.
    703 
    704      To reiterate, we try:
    705      Specific:
    706       language+country+variant
    707       language+country
    708       language
    709      Default:
    710       language+country+variant
    711       language+country
    712       language
    713      Root: (aka DEFAULTRULES)
    714      steps 1-5 are handled by resource bundle fallback mechanism.
    715      however, in a very unprobable situation that no resource bundle
    716      data exists, step 5 is repeated with hardcoded default rules.
    717     */
    718 
    719     setUCollator(desiredLocale, status);
    720 
    721     if (U_FAILURE(status))
    722     {
    723         status = U_ZERO_ERROR;
    724 
    725         setUCollator(kRootLocaleName, status);
    726         if (status == U_ZERO_ERROR) {
    727             status = U_USING_DEFAULT_WARNING;
    728         }
    729     }
    730 
    731     if (U_SUCCESS(status))
    732     {
    733         setRuleStringFromCollator();
    734     }
    735 }
    736 
    737 void
    738 RuleBasedCollator::setUCollator(const char *locale,
    739                                 UErrorCode &status)
    740 {
    741     if (U_FAILURE(status)) {
    742         return;
    743     }
    744     if (ucollator && dataIsOwned)
    745         ucol_close(ucollator);
    746     ucollator = ucol_open_internal(locale, &status);
    747     dataIsOwned = TRUE;
    748     isWriteThroughAlias = FALSE;
    749 }
    750 
    751 
    752 void
    753 RuleBasedCollator::checkOwned() {
    754     if (!(dataIsOwned || isWriteThroughAlias)) {
    755         UErrorCode status = U_ZERO_ERROR;
    756         ucollator = ucol_safeClone(ucollator, NULL, NULL, &status);
    757         setRuleStringFromCollator();
    758         dataIsOwned = TRUE;
    759         isWriteThroughAlias = FALSE;
    760     }
    761 }
    762 
    763 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
    764 
    765 U_NAMESPACE_END
    766 
    767 #endif /* #if !UCONFIG_NO_COLLATION */
    768