Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1996-2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * rulebasedcollator.cpp
      7 *
      8 * (replaced the former tblcoll.cpp)
      9 *
     10 * created on: 2012feb14 with new and old collation code
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #include "unicode/utypes.h"
     15 
     16 #if !UCONFIG_NO_COLLATION
     17 
     18 #include "unicode/coll.h"
     19 #include "unicode/coleitr.h"
     20 #include "unicode/localpointer.h"
     21 #include "unicode/locid.h"
     22 #include "unicode/sortkey.h"
     23 #include "unicode/tblcoll.h"
     24 #include "unicode/ucol.h"
     25 #include "unicode/uiter.h"
     26 #include "unicode/uloc.h"
     27 #include "unicode/uniset.h"
     28 #include "unicode/unistr.h"
     29 #include "unicode/usetiter.h"
     30 #include "unicode/utf8.h"
     31 #include "unicode/uversion.h"
     32 #include "bocsu.h"
     33 #include "charstr.h"
     34 #include "cmemory.h"
     35 #include "collation.h"
     36 #include "collationcompare.h"
     37 #include "collationdata.h"
     38 #include "collationdatareader.h"
     39 #include "collationfastlatin.h"
     40 #include "collationiterator.h"
     41 #include "collationkeys.h"
     42 #include "collationroot.h"
     43 #include "collationsets.h"
     44 #include "collationsettings.h"
     45 #include "collationtailoring.h"
     46 #include "cstring.h"
     47 #include "uassert.h"
     48 #include "ucol_imp.h"
     49 #include "uhash.h"
     50 #include "uitercollationiterator.h"
     51 #include "ustr_imp.h"
     52 #include "utf16collationiterator.h"
     53 #include "utf8collationiterator.h"
     54 #include "uvectr64.h"
     55 
     56 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     57 
     58 U_NAMESPACE_BEGIN
     59 
     60 namespace {
     61 
     62 class FixedSortKeyByteSink : public SortKeyByteSink {
     63 public:
     64     FixedSortKeyByteSink(char *dest, int32_t destCapacity)
     65             : SortKeyByteSink(dest, destCapacity) {}
     66     virtual ~FixedSortKeyByteSink();
     67 
     68 private:
     69     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
     70     virtual UBool Resize(int32_t appendCapacity, int32_t length);
     71 };
     72 
     73 FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
     74 
     75 void
     76 FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
     77     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
     78     // Fill the buffer completely.
     79     int32_t available = capacity_ - length;
     80     if (available > 0) {
     81         uprv_memcpy(buffer_ + length, bytes, available);
     82     }
     83 }
     84 
     85 UBool
     86 FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
     87     return FALSE;
     88 }
     89 
     90 }  // namespace
     91 
     92 // Not in an anonymous namespace, so that it can be a friend of CollationKey.
     93 class CollationKeyByteSink : public SortKeyByteSink {
     94 public:
     95     CollationKeyByteSink(CollationKey &key)
     96             : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
     97               key_(key) {}
     98     virtual ~CollationKeyByteSink();
     99 
    100 private:
    101     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
    102     virtual UBool Resize(int32_t appendCapacity, int32_t length);
    103 
    104     CollationKey &key_;
    105 };
    106 
    107 CollationKeyByteSink::~CollationKeyByteSink() {}
    108 
    109 void
    110 CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
    111     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
    112     if (Resize(n, length)) {
    113         uprv_memcpy(buffer_ + length, bytes, n);
    114     }
    115 }
    116 
    117 UBool
    118 CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
    119     if (buffer_ == NULL) {
    120         return FALSE;  // allocation failed before already
    121     }
    122     int32_t newCapacity = 2 * capacity_;
    123     int32_t altCapacity = length + 2 * appendCapacity;
    124     if (newCapacity < altCapacity) {
    125         newCapacity = altCapacity;
    126     }
    127     if (newCapacity < 200) {
    128         newCapacity = 200;
    129     }
    130     uint8_t *newBuffer = key_.reallocate(newCapacity, length);
    131     if (newBuffer == NULL) {
    132         SetNotOk();
    133         return FALSE;
    134     }
    135     buffer_ = reinterpret_cast<char *>(newBuffer);
    136     capacity_ = newCapacity;
    137     return TRUE;
    138 }
    139 
    140 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
    141         : Collator(other),
    142           data(other.data),
    143           settings(other.settings),
    144           tailoring(other.tailoring),
    145           validLocale(other.validLocale),
    146           explicitlySetAttributes(other.explicitlySetAttributes),
    147           actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
    148     settings->addRef();
    149     tailoring->addRef();
    150 }
    151 
    152 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
    153                                      const RuleBasedCollator *base, UErrorCode &errorCode)
    154         : data(NULL),
    155           settings(NULL),
    156           tailoring(NULL),
    157           validLocale(""),
    158           explicitlySetAttributes(0),
    159           actualLocaleIsSameAsValid(FALSE) {
    160     if(U_FAILURE(errorCode)) { return; }
    161     if(bin == NULL || length <= 0 || base == NULL) {
    162         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    163         return;
    164     }
    165     const CollationTailoring *root = CollationRoot::getRoot(errorCode);
    166     if(U_FAILURE(errorCode)) { return; }
    167     if(base->tailoring != root) {
    168         errorCode = U_UNSUPPORTED_ERROR;
    169         return;
    170     }
    171     LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
    172     if(t.isNull() || t->isBogus()) {
    173         errorCode = U_MEMORY_ALLOCATION_ERROR;
    174         return;
    175     }
    176     CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
    177     if(U_FAILURE(errorCode)) { return; }
    178     t->actualLocale.setToBogus();
    179     adoptTailoring(t.orphan());
    180 }
    181 
    182 RuleBasedCollator::RuleBasedCollator(const CollationTailoring *t, const Locale &vl)
    183         : data(t->data),
    184           settings(t->settings),
    185           tailoring(t),
    186           validLocale(vl),
    187           explicitlySetAttributes(0),
    188           actualLocaleIsSameAsValid(FALSE) {
    189     settings->addRef();
    190     tailoring->addRef();
    191 }
    192 
    193 RuleBasedCollator::~RuleBasedCollator() {
    194     SharedObject::clearPtr(settings);
    195     SharedObject::clearPtr(tailoring);
    196 }
    197 
    198 void
    199 RuleBasedCollator::adoptTailoring(CollationTailoring *t) {
    200     U_ASSERT(settings == NULL && data == NULL && tailoring == NULL);
    201     data = t->data;
    202     settings = t->settings;
    203     settings->addRef();
    204     t->addRef();
    205     tailoring = t;
    206     validLocale = t->actualLocale;
    207     actualLocaleIsSameAsValid = FALSE;
    208 }
    209 
    210 Collator *
    211 RuleBasedCollator::clone() const {
    212     return new RuleBasedCollator(*this);
    213 }
    214 
    215 RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
    216     if(this == &other) { return *this; }
    217     SharedObject::copyPtr(other.settings, settings);
    218     SharedObject::copyPtr(other.tailoring, tailoring);
    219     data = tailoring->data;
    220     validLocale = other.validLocale;
    221     explicitlySetAttributes = other.explicitlySetAttributes;
    222     actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
    223     return *this;
    224 }
    225 
    226 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
    227 
    228 UBool
    229 RuleBasedCollator::operator==(const Collator& other) const {
    230     if(this == &other) { return TRUE; }
    231     if(!Collator::operator==(other)) { return FALSE; }
    232     const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
    233     if(*settings != *o.settings) { return FALSE; }
    234     if(data == o.data) { return TRUE; }
    235     UBool thisIsRoot = data->base == NULL;
    236     UBool otherIsRoot = o.data->base == NULL;
    237     U_ASSERT(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
    238     if(thisIsRoot != otherIsRoot) { return FALSE; }
    239     if((thisIsRoot || !tailoring->rules.isEmpty()) &&
    240             (otherIsRoot || !o.tailoring->rules.isEmpty())) {
    241         // Shortcut: If both collators have valid rule strings, then compare those.
    242         if(tailoring->rules == o.tailoring->rules) { return TRUE; }
    243     }
    244     // Different rule strings can result in the same or equivalent tailoring.
    245     // The rule strings are optional in ICU resource bundles, although included by default.
    246     // cloneBinary() drops the rule string.
    247     UErrorCode errorCode = U_ZERO_ERROR;
    248     LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
    249     LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
    250     if(U_FAILURE(errorCode)) { return FALSE; }
    251     if(*thisTailored != *otherTailored) { return FALSE; }
    252     // For completeness, we should compare all of the mappings;
    253     // or we should create a list of strings, sort it with one collator,
    254     // and check if both collators compare adjacent strings the same
    255     // (order & strength, down to quaternary); or similar.
    256     // Testing equality of collators seems unusual.
    257     return TRUE;
    258 }
    259 
    260 int32_t
    261 RuleBasedCollator::hashCode() const {
    262     int32_t h = settings->hashCode();
    263     if(data->base == NULL) { return h; }  // root collator
    264     // Do not rely on the rule string, see comments in operator==().
    265     UErrorCode errorCode = U_ZERO_ERROR;
    266     LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
    267     if(U_FAILURE(errorCode)) { return 0; }
    268     UnicodeSetIterator iter(*set);
    269     while(iter.next() && !iter.isString()) {
    270         h ^= data->getCE32(iter.getCodepoint());
    271     }
    272     return h;
    273 }
    274 
    275 void
    276 RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
    277                               const Locale &actual) {
    278     if(actual == tailoring->actualLocale) {
    279         actualLocaleIsSameAsValid = FALSE;
    280     } else {
    281         U_ASSERT(actual == valid);
    282         actualLocaleIsSameAsValid = TRUE;
    283     }
    284     // Do not modify tailoring.actualLocale:
    285     // We cannot be sure that that would be thread-safe.
    286     validLocale = valid;
    287     (void)requested;  // Ignore, see also ticket #10477.
    288 }
    289 
    290 Locale
    291 RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
    292     if(U_FAILURE(errorCode)) {
    293         return Locale::getRoot();
    294     }
    295     switch(type) {
    296     case ULOC_ACTUAL_LOCALE:
    297         return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
    298     case ULOC_VALID_LOCALE:
    299     case ULOC_REQUESTED_LOCALE:  // TODO: Drop this, see ticket #10477.
    300         return validLocale;
    301     default:
    302         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    303         return Locale::getRoot();
    304     }
    305 }
    306 
    307 const char *
    308 RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
    309     if(U_FAILURE(errorCode)) {
    310         return NULL;
    311     }
    312     const Locale *result;
    313     switch(type) {
    314     case ULOC_ACTUAL_LOCALE:
    315         result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
    316         break;
    317     case ULOC_VALID_LOCALE:
    318     case ULOC_REQUESTED_LOCALE:  // TODO: Drop this, see ticket #10477.
    319         result = &validLocale;
    320         break;
    321     default:
    322         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    323         return NULL;
    324     }
    325     if(result->isBogus()) { return NULL; }
    326     const char *id = result->getName();
    327     return id[0] == 0 ? "root" : id;
    328 }
    329 
    330 const UnicodeString&
    331 RuleBasedCollator::getRules() const {
    332     return tailoring->rules;
    333 }
    334 
    335 void
    336 RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
    337     if(delta == UCOL_TAILORING_ONLY) {
    338         buffer = tailoring->rules;
    339         return;
    340     }
    341     // UCOL_FULL_RULES
    342     buffer.remove();
    343     CollationLoader::appendRootRules(buffer);
    344     buffer.append(tailoring->rules).getTerminatedBuffer();
    345 }
    346 
    347 void
    348 RuleBasedCollator::getVersion(UVersionInfo version) const {
    349     uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
    350     version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
    351 }
    352 
    353 UnicodeSet *
    354 RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
    355     if(U_FAILURE(errorCode)) { return NULL; }
    356     UnicodeSet *tailored = new UnicodeSet();
    357     if(tailored == NULL) {
    358         errorCode = U_MEMORY_ALLOCATION_ERROR;
    359         return NULL;
    360     }
    361     if(data->base != NULL) {
    362         TailoredSet(tailored).forData(data, errorCode);
    363         if(U_FAILURE(errorCode)) {
    364             delete tailored;
    365             return NULL;
    366         }
    367     }
    368     return tailored;
    369 }
    370 
    371 void
    372 RuleBasedCollator::internalGetContractionsAndExpansions(
    373         UnicodeSet *contractions, UnicodeSet *expansions,
    374         UBool addPrefixes, UErrorCode &errorCode) const {
    375     if(U_FAILURE(errorCode)) { return; }
    376     if(contractions != NULL) {
    377         contractions->clear();
    378     }
    379     if(expansions != NULL) {
    380         expansions->clear();
    381     }
    382     ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forData(data, errorCode);
    383 }
    384 
    385 void
    386 RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
    387     if(U_FAILURE(errorCode)) { return; }
    388     ContractionsAndExpansions(&set, NULL, NULL, FALSE).forCodePoint(data, c, errorCode);
    389 }
    390 
    391 const CollationSettings &
    392 RuleBasedCollator::getDefaultSettings() const {
    393     return *tailoring->settings;
    394 }
    395 
    396 UColAttributeValue
    397 RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
    398     if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
    399     int32_t option;
    400     switch(attr) {
    401     case UCOL_FRENCH_COLLATION:
    402         option = CollationSettings::BACKWARD_SECONDARY;
    403         break;
    404     case UCOL_ALTERNATE_HANDLING:
    405         return settings->getAlternateHandling();
    406     case UCOL_CASE_FIRST:
    407         return settings->getCaseFirst();
    408     case UCOL_CASE_LEVEL:
    409         option = CollationSettings::CASE_LEVEL;
    410         break;
    411     case UCOL_NORMALIZATION_MODE:
    412         option = CollationSettings::CHECK_FCD;
    413         break;
    414     case UCOL_STRENGTH:
    415         return (UColAttributeValue)settings->getStrength();
    416     case UCOL_HIRAGANA_QUATERNARY_MODE:
    417         // Deprecated attribute, unsettable.
    418         return UCOL_OFF;
    419     case UCOL_NUMERIC_COLLATION:
    420         option = CollationSettings::NUMERIC;
    421         break;
    422     default:
    423         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    424         return UCOL_DEFAULT;
    425     }
    426     return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
    427 }
    428 
    429 void
    430 RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
    431                                 UErrorCode &errorCode) {
    432     UColAttributeValue oldValue = getAttribute(attr, errorCode);
    433     if(U_FAILURE(errorCode)) { return; }
    434     if(value == oldValue) {
    435         setAttributeExplicitly(attr);
    436         return;
    437     }
    438     const CollationSettings &defaultSettings = getDefaultSettings();
    439     if(settings == &defaultSettings) {
    440         if(value == UCOL_DEFAULT) {
    441             setAttributeDefault(attr);
    442             return;
    443         }
    444     }
    445     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    446     if(ownedSettings == NULL) {
    447         errorCode = U_MEMORY_ALLOCATION_ERROR;
    448         return;
    449     }
    450 
    451     switch(attr) {
    452     case UCOL_FRENCH_COLLATION:
    453         ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
    454                                defaultSettings.options, errorCode);
    455         break;
    456     case UCOL_ALTERNATE_HANDLING:
    457         ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
    458         break;
    459     case UCOL_CASE_FIRST:
    460         ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
    461         break;
    462     case UCOL_CASE_LEVEL:
    463         ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
    464                                defaultSettings.options, errorCode);
    465         break;
    466     case UCOL_NORMALIZATION_MODE:
    467         ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
    468                                defaultSettings.options, errorCode);
    469         break;
    470     case UCOL_STRENGTH:
    471         ownedSettings->setStrength(value, defaultSettings.options, errorCode);
    472         break;
    473     case UCOL_HIRAGANA_QUATERNARY_MODE:
    474         // Deprecated attribute. Check for valid values but do not change anything.
    475         if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
    476             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    477         }
    478         break;
    479     case UCOL_NUMERIC_COLLATION:
    480         ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
    481         break;
    482     default:
    483         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    484         break;
    485     }
    486     if(U_FAILURE(errorCode)) { return; }
    487     setFastLatinOptions(*ownedSettings);
    488     if(value == UCOL_DEFAULT) {
    489         setAttributeDefault(attr);
    490     } else {
    491         setAttributeExplicitly(attr);
    492     }
    493 }
    494 
    495 Collator &
    496 RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
    497     if(U_FAILURE(errorCode)) { return *this; }
    498     // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
    499     int32_t value;
    500     if(group == UCOL_REORDER_CODE_DEFAULT) {
    501         value = UCOL_DEFAULT;
    502     } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
    503         value = group - UCOL_REORDER_CODE_FIRST;
    504     } else {
    505         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    506         return *this;
    507     }
    508     CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
    509     if(value == oldValue) {
    510         setAttributeExplicitly(ATTR_VARIABLE_TOP);
    511         return *this;
    512     }
    513     const CollationSettings &defaultSettings = getDefaultSettings();
    514     if(settings == &defaultSettings) {
    515         if(value == UCOL_DEFAULT) {
    516             setAttributeDefault(ATTR_VARIABLE_TOP);
    517             return *this;
    518         }
    519     }
    520     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    521     if(ownedSettings == NULL) {
    522         errorCode = U_MEMORY_ALLOCATION_ERROR;
    523         return *this;
    524     }
    525 
    526     if(group == UCOL_REORDER_CODE_DEFAULT) {
    527         group = (UColReorderCode)(UCOL_REORDER_CODE_FIRST + defaultSettings.getMaxVariable());
    528     }
    529     uint32_t varTop = data->getLastPrimaryForGroup(group);
    530     U_ASSERT(varTop != 0);
    531     ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
    532     if(U_FAILURE(errorCode)) { return *this; }
    533     ownedSettings->variableTop = varTop;
    534     setFastLatinOptions(*ownedSettings);
    535     if(value == UCOL_DEFAULT) {
    536         setAttributeDefault(ATTR_VARIABLE_TOP);
    537     } else {
    538         setAttributeExplicitly(ATTR_VARIABLE_TOP);
    539     }
    540     return *this;
    541 }
    542 
    543 UColReorderCode
    544 RuleBasedCollator::getMaxVariable() const {
    545     return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());
    546 }
    547 
    548 uint32_t
    549 RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
    550     return settings->variableTop;
    551 }
    552 
    553 uint32_t
    554 RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &errorCode) {
    555     if(U_FAILURE(errorCode)) { return 0; }
    556     if(varTop == NULL && len !=0) {
    557         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    558         return 0;
    559     }
    560     if(len < 0) { len = u_strlen(varTop); }
    561     if(len == 0) {
    562         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    563         return 0;
    564     }
    565     UBool numeric = settings->isNumeric();
    566     int64_t ce1, ce2;
    567     if(settings->dontCheckFCD()) {
    568         UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
    569         ce1 = ci.nextCE(errorCode);
    570         ce2 = ci.nextCE(errorCode);
    571     } else {
    572         FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
    573         ce1 = ci.nextCE(errorCode);
    574         ce2 = ci.nextCE(errorCode);
    575     }
    576     if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
    577         errorCode = U_CE_NOT_FOUND_ERROR;
    578         return 0;
    579     }
    580     setVariableTop((uint32_t)(ce1 >> 32), errorCode);
    581     return settings->variableTop;
    582 }
    583 
    584 uint32_t
    585 RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
    586     return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
    587 }
    588 
    589 void
    590 RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
    591     if(U_FAILURE(errorCode)) { return; }
    592     if(varTop != settings->variableTop) {
    593         // Pin the variable top to the end of the reordering group which contains it.
    594         // Only a few special groups are supported.
    595         int32_t group = data->getGroupForPrimary(varTop);
    596         if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
    597             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    598             return;
    599         }
    600         uint32_t v = data->getLastPrimaryForGroup(group);
    601         U_ASSERT(v != 0 && v >= varTop);
    602         varTop = v;
    603         if(varTop != settings->variableTop) {
    604             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    605             if(ownedSettings == NULL) {
    606                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    607                 return;
    608             }
    609             ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
    610                                           getDefaultSettings().options, errorCode);
    611             if(U_FAILURE(errorCode)) { return; }
    612             ownedSettings->variableTop = varTop;
    613             setFastLatinOptions(*ownedSettings);
    614         }
    615     }
    616     if(varTop == getDefaultSettings().variableTop) {
    617         setAttributeDefault(ATTR_VARIABLE_TOP);
    618     } else {
    619         setAttributeExplicitly(ATTR_VARIABLE_TOP);
    620     }
    621 }
    622 
    623 int32_t
    624 RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
    625                                    UErrorCode &errorCode) const {
    626     if(U_FAILURE(errorCode)) { return 0; }
    627     if(capacity < 0 || (dest == NULL && capacity > 0)) {
    628         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    629         return 0;
    630     }
    631     int32_t length = settings->reorderCodesLength;
    632     if(length == 0) { return 0; }
    633     if(length > capacity) {
    634         errorCode = U_BUFFER_OVERFLOW_ERROR;
    635         return length;
    636     }
    637     uprv_memcpy(dest, settings->reorderCodes, length * 4);
    638     return length;
    639 }
    640 
    641 void
    642 RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
    643                                    UErrorCode &errorCode) {
    644     if(U_FAILURE(errorCode)) { return; }
    645     if(length < 0 || (reorderCodes == NULL && length > 0)) {
    646         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    647         return;
    648     }
    649     if(length == settings->reorderCodesLength &&
    650             uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
    651         return;
    652     }
    653     const CollationSettings &defaultSettings = getDefaultSettings();
    654     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
    655         if(settings != &defaultSettings) {
    656             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    657             if(ownedSettings == NULL) {
    658                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    659                 return;
    660             }
    661             ownedSettings->aliasReordering(defaultSettings.reorderCodes,
    662                                            defaultSettings.reorderCodesLength,
    663                                            defaultSettings.reorderTable);
    664             setFastLatinOptions(*ownedSettings);
    665         }
    666         return;
    667     }
    668     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    669     if(ownedSettings == NULL) {
    670         errorCode = U_MEMORY_ALLOCATION_ERROR;
    671         return;
    672     }
    673     if(length == 0) {
    674         ownedSettings->resetReordering();
    675     } else {
    676         uint8_t reorderTable[256];
    677         data->makeReorderTable(reorderCodes, length, reorderTable, errorCode);
    678         if(U_FAILURE(errorCode)) { return; }
    679         if(!ownedSettings->setReordering(reorderCodes, length, reorderTable)) {
    680             errorCode = U_MEMORY_ALLOCATION_ERROR;
    681             return;
    682         }
    683     }
    684     setFastLatinOptions(*ownedSettings);
    685 }
    686 
    687 void
    688 RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
    689     ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
    690             data, ownedSettings,
    691             ownedSettings.fastLatinPrimaries, LENGTHOF(ownedSettings.fastLatinPrimaries));
    692 }
    693 
    694 UCollationResult
    695 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
    696                            UErrorCode &errorCode) const {
    697     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    698     return doCompare(left.getBuffer(), left.length(),
    699                      right.getBuffer(), right.length(), errorCode);
    700 }
    701 
    702 UCollationResult
    703 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
    704                            int32_t length, UErrorCode &errorCode) const {
    705     if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
    706     if(length < 0) {
    707         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    708         return UCOL_EQUAL;
    709     }
    710     int32_t leftLength = left.length();
    711     int32_t rightLength = right.length();
    712     if(leftLength > length) { leftLength = length; }
    713     if(rightLength > length) { rightLength = length; }
    714     return doCompare(left.getBuffer(), leftLength,
    715                      right.getBuffer(), rightLength, errorCode);
    716 }
    717 
    718 UCollationResult
    719 RuleBasedCollator::compare(const UChar *left, int32_t leftLength,
    720                            const UChar *right, int32_t rightLength,
    721                            UErrorCode &errorCode) const {
    722     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    723     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
    724         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    725         return UCOL_EQUAL;
    726     }
    727     // Make sure both or neither strings have a known length.
    728     // We do not optimize for mixed length/termination.
    729     if(leftLength >= 0) {
    730         if(rightLength < 0) { rightLength = u_strlen(right); }
    731     } else {
    732         if(rightLength >= 0) { leftLength = u_strlen(left); }
    733     }
    734     return doCompare(left, leftLength, right, rightLength, errorCode);
    735 }
    736 
    737 UCollationResult
    738 RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
    739                                UErrorCode &errorCode) const {
    740     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    741     const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
    742     const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
    743     if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.empty())) {
    744         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    745         return UCOL_EQUAL;
    746     }
    747     return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
    748 }
    749 
    750 UCollationResult
    751 RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
    752                                        const char *right, int32_t rightLength,
    753                                        UErrorCode &errorCode) const {
    754     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    755     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
    756         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    757         return UCOL_EQUAL;
    758     }
    759     // Make sure both or neither strings have a known length.
    760     // We do not optimize for mixed length/termination.
    761     if(leftLength >= 0) {
    762         if(rightLength < 0) { rightLength = uprv_strlen(right); }
    763     } else {
    764         if(rightLength >= 0) { leftLength = uprv_strlen(left); }
    765     }
    766     return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
    767                      reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
    768 }
    769 
    770 namespace {
    771 
    772 /**
    773  * Abstract iterator for identical-level string comparisons.
    774  * Returns FCD code points and handles temporary switching to NFD.
    775  */
    776 class NFDIterator {
    777 public:
    778     NFDIterator() : index(-1), length(0) {}
    779     virtual ~NFDIterator() {}
    780     /**
    781      * Returns the next code point from the internal normalization buffer,
    782      * or else the next text code point.
    783      * Returns -1 at the end of the text.
    784      */
    785     UChar32 nextCodePoint() {
    786         if(index >= 0) {
    787             if(index == length) {
    788                 index = -1;
    789             } else {
    790                 UChar32 c;
    791                 U16_NEXT_UNSAFE(decomp, index, c);
    792                 return c;
    793             }
    794         }
    795         return nextRawCodePoint();
    796     }
    797     /**
    798      * @param nfcImpl
    799      * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
    800      * @return the first code point in c's decomposition,
    801      *         or c itself if it was decomposed already or if it does not decompose
    802      */
    803     UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
    804         if(index >= 0) { return c; }
    805         decomp = nfcImpl.getDecomposition(c, buffer, length);
    806         if(decomp == NULL) { return c; }
    807         index = 0;
    808         U16_NEXT_UNSAFE(decomp, index, c);
    809         return c;
    810     }
    811 protected:
    812     /**
    813      * Returns the next text code point in FCD order.
    814      * Returns -1 at the end of the text.
    815      */
    816     virtual UChar32 nextRawCodePoint() = 0;
    817 private:
    818     const UChar *decomp;
    819     UChar buffer[4];
    820     int32_t index;
    821     int32_t length;
    822 };
    823 
    824 class UTF16NFDIterator : public NFDIterator {
    825 public:
    826     UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {}
    827 protected:
    828     virtual UChar32 nextRawCodePoint() {
    829         if(s == limit) { return U_SENTINEL; }
    830         UChar32 c = *s++;
    831         if(limit == NULL && c == 0) {
    832             s = NULL;
    833             return U_SENTINEL;
    834         }
    835         UChar trail;
    836         if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
    837             ++s;
    838             c = U16_GET_SUPPLEMENTARY(c, trail);
    839         }
    840         return c;
    841     }
    842 
    843     const UChar *s;
    844     const UChar *limit;
    845 };
    846 
    847 class FCDUTF16NFDIterator : public UTF16NFDIterator {
    848 public:
    849     FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const UChar *textLimit)
    850             : UTF16NFDIterator(NULL, NULL) {
    851         UErrorCode errorCode = U_ZERO_ERROR;
    852         const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCode);
    853         if(U_FAILURE(errorCode)) { return; }
    854         if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) {
    855             s = text;
    856             limit = spanLimit;
    857         } else {
    858             str.setTo(text, (int32_t)(spanLimit - text));
    859             {
    860                 ReorderingBuffer buffer(nfcImpl, str);
    861                 if(buffer.init(str.length(), errorCode)) {
    862                     nfcImpl.makeFCD(spanLimit, textLimit, &buffer, errorCode);
    863                 }
    864             }
    865             if(U_SUCCESS(errorCode)) {
    866                 s = str.getBuffer();
    867                 limit = s + str.length();
    868             }
    869         }
    870     }
    871 private:
    872     UnicodeString str;
    873 };
    874 
    875 class UTF8NFDIterator : public NFDIterator {
    876 public:
    877     UTF8NFDIterator(const uint8_t *text, int32_t textLength)
    878         : s(text), pos(0), length(textLength) {}
    879 protected:
    880     virtual UChar32 nextRawCodePoint() {
    881         if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
    882         UChar32 c;
    883         U8_NEXT_OR_FFFD(s, pos, length, c);
    884         return c;
    885     }
    886 
    887     const uint8_t *s;
    888     int32_t pos;
    889     int32_t length;
    890 };
    891 
    892 class FCDUTF8NFDIterator : public NFDIterator {
    893 public:
    894     FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
    895             : u8ci(data, FALSE, text, 0, textLength) {}
    896 protected:
    897     virtual UChar32 nextRawCodePoint() {
    898         UErrorCode errorCode = U_ZERO_ERROR;
    899         return u8ci.nextCodePoint(errorCode);
    900     }
    901 private:
    902     FCDUTF8CollationIterator u8ci;
    903 };
    904 
    905 class UIterNFDIterator : public NFDIterator {
    906 public:
    907     UIterNFDIterator(UCharIterator &it) : iter(it) {}
    908 protected:
    909     virtual UChar32 nextRawCodePoint() {
    910         return uiter_next32(&iter);
    911     }
    912 private:
    913     UCharIterator &iter;
    914 };
    915 
    916 class FCDUIterNFDIterator : public NFDIterator {
    917 public:
    918     FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
    919             : uici(data, FALSE, it, startIndex) {}
    920 protected:
    921     virtual UChar32 nextRawCodePoint() {
    922         UErrorCode errorCode = U_ZERO_ERROR;
    923         return uici.nextCodePoint(errorCode);
    924     }
    925 private:
    926     FCDUIterCollationIterator uici;
    927 };
    928 
    929 UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
    930                                 NFDIterator &left, NFDIterator &right) {
    931     for(;;) {
    932         // Fetch the next FCD code point from each string.
    933         UChar32 leftCp = left.nextCodePoint();
    934         UChar32 rightCp = right.nextCodePoint();
    935         if(leftCp == rightCp) {
    936             if(leftCp < 0) { break; }
    937             continue;
    938         }
    939         // If they are different, then decompose each and compare again.
    940         if(leftCp < 0) {
    941             leftCp = -2;  // end of string
    942         } else if(leftCp == 0xfffe) {
    943             leftCp = -1;  // U+FFFE: merge separator
    944         } else {
    945             leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
    946         }
    947         if(rightCp < 0) {
    948             rightCp = -2;  // end of string
    949         } else if(rightCp == 0xfffe) {
    950             rightCp = -1;  // U+FFFE: merge separator
    951         } else {
    952             rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
    953         }
    954         if(leftCp < rightCp) { return UCOL_LESS; }
    955         if(leftCp > rightCp) { return UCOL_GREATER; }
    956     }
    957     return UCOL_EQUAL;
    958 }
    959 
    960 }  // namespace
    961 
    962 UCollationResult
    963 RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength,
    964                              const UChar *right, int32_t rightLength,
    965                              UErrorCode &errorCode) const {
    966     // U_FAILURE(errorCode) checked by caller.
    967     if(left == right && leftLength == rightLength) {
    968         return UCOL_EQUAL;
    969     }
    970 
    971     // Identical-prefix test.
    972     const UChar *leftLimit;
    973     const UChar *rightLimit;
    974     int32_t equalPrefixLength = 0;
    975     if(leftLength < 0) {
    976         leftLimit = NULL;
    977         rightLimit = NULL;
    978         UChar c;
    979         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
    980             if(c == 0) { return UCOL_EQUAL; }
    981             ++equalPrefixLength;
    982         }
    983     } else {
    984         leftLimit = left + leftLength;
    985         rightLimit = right + rightLength;
    986         for(;;) {
    987             if(equalPrefixLength == leftLength) {
    988                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
    989                 break;
    990             } else if(equalPrefixLength == rightLength ||
    991                       left[equalPrefixLength] != right[equalPrefixLength]) {
    992                 break;
    993             }
    994             ++equalPrefixLength;
    995         }
    996     }
    997 
    998     UBool numeric = settings->isNumeric();
    999     if(equalPrefixLength > 0) {
   1000         if((equalPrefixLength != leftLength &&
   1001                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
   1002                 (equalPrefixLength != rightLength &&
   1003                     data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
   1004             // Identical prefix: Back up to the start of a contraction or reordering sequence.
   1005             while(--equalPrefixLength > 0 &&
   1006                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
   1007         }
   1008         // Notes:
   1009         // - A longer string can compare equal to a prefix of it if only ignorables follow.
   1010         // - With a backward level, a longer string can compare less-than a prefix of it.
   1011 
   1012         // Pass the actual start of each string into the CollationIterators,
   1013         // plus the equalPrefixLength position,
   1014         // so that prefix matches back into the equal prefix work.
   1015     }
   1016 
   1017     int32_t result;
   1018     int32_t fastLatinOptions = settings->fastLatinOptions;
   1019     if(fastLatinOptions >= 0 &&
   1020             (equalPrefixLength == leftLength ||
   1021                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
   1022             (equalPrefixLength == rightLength ||
   1023                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
   1024         if(leftLength >= 0) {
   1025             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
   1026                                                       settings->fastLatinPrimaries,
   1027                                                       fastLatinOptions,
   1028                                                       left + equalPrefixLength,
   1029                                                       leftLength - equalPrefixLength,
   1030                                                       right + equalPrefixLength,
   1031                                                       rightLength - equalPrefixLength);
   1032         } else {
   1033             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
   1034                                                       settings->fastLatinPrimaries,
   1035                                                       fastLatinOptions,
   1036                                                       left + equalPrefixLength, -1,
   1037                                                       right + equalPrefixLength, -1);
   1038         }
   1039     } else {
   1040         result = CollationFastLatin::BAIL_OUT_RESULT;
   1041     }
   1042 
   1043     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
   1044         if(settings->dontCheckFCD()) {
   1045             UTF16CollationIterator leftIter(data, numeric,
   1046                                             left, left + equalPrefixLength, leftLimit);
   1047             UTF16CollationIterator rightIter(data, numeric,
   1048                                             right, right + equalPrefixLength, rightLimit);
   1049             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1050         } else {
   1051             FCDUTF16CollationIterator leftIter(data, numeric,
   1052                                               left, left + equalPrefixLength, leftLimit);
   1053             FCDUTF16CollationIterator rightIter(data, numeric,
   1054                                                 right, right + equalPrefixLength, rightLimit);
   1055             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1056         }
   1057     }
   1058     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
   1059         return (UCollationResult)result;
   1060     }
   1061 
   1062     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
   1063     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
   1064     // and the benefit seems unlikely to be measurable.
   1065 
   1066     // Compare identical level.
   1067     const Normalizer2Impl &nfcImpl = data->nfcImpl;
   1068     left += equalPrefixLength;
   1069     right += equalPrefixLength;
   1070     if(settings->dontCheckFCD()) {
   1071         UTF16NFDIterator leftIter(left, leftLimit);
   1072         UTF16NFDIterator rightIter(right, rightLimit);
   1073         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1074     } else {
   1075         FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
   1076         FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
   1077         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1078     }
   1079 }
   1080 
   1081 UCollationResult
   1082 RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
   1083                              const uint8_t *right, int32_t rightLength,
   1084                              UErrorCode &errorCode) const {
   1085     // U_FAILURE(errorCode) checked by caller.
   1086     if(left == right && leftLength == rightLength) {
   1087         return UCOL_EQUAL;
   1088     }
   1089 
   1090     // Identical-prefix test.
   1091     int32_t equalPrefixLength = 0;
   1092     if(leftLength < 0) {
   1093         uint8_t c;
   1094         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
   1095             if(c == 0) { return UCOL_EQUAL; }
   1096             ++equalPrefixLength;
   1097         }
   1098     } else {
   1099         for(;;) {
   1100             if(equalPrefixLength == leftLength) {
   1101                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
   1102                 break;
   1103             } else if(equalPrefixLength == rightLength ||
   1104                       left[equalPrefixLength] != right[equalPrefixLength]) {
   1105                 break;
   1106             }
   1107             ++equalPrefixLength;
   1108         }
   1109     }
   1110     // Back up to the start of a partially-equal code point.
   1111     if(equalPrefixLength > 0 &&
   1112             ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
   1113             (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
   1114         while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
   1115     }
   1116 
   1117     UBool numeric = settings->isNumeric();
   1118     if(equalPrefixLength > 0) {
   1119         UBool unsafe = FALSE;
   1120         if(equalPrefixLength != leftLength) {
   1121             int32_t i = equalPrefixLength;
   1122             UChar32 c;
   1123             U8_NEXT_OR_FFFD(left, i, leftLength, c);
   1124             unsafe = data->isUnsafeBackward(c, numeric);
   1125         }
   1126         if(!unsafe && equalPrefixLength != rightLength) {
   1127             int32_t i = equalPrefixLength;
   1128             UChar32 c;
   1129             U8_NEXT_OR_FFFD(right, i, rightLength, c);
   1130             unsafe = data->isUnsafeBackward(c, numeric);
   1131         }
   1132         if(unsafe) {
   1133             // Identical prefix: Back up to the start of a contraction or reordering sequence.
   1134             UChar32 c;
   1135             do {
   1136                 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
   1137             } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
   1138         }
   1139         // See the notes in the UTF-16 version.
   1140 
   1141         // Pass the actual start of each string into the CollationIterators,
   1142         // plus the equalPrefixLength position,
   1143         // so that prefix matches back into the equal prefix work.
   1144     }
   1145 
   1146     int32_t result;
   1147     int32_t fastLatinOptions = settings->fastLatinOptions;
   1148     if(fastLatinOptions >= 0 &&
   1149             (equalPrefixLength == leftLength ||
   1150                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
   1151             (equalPrefixLength == rightLength ||
   1152                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
   1153         if(leftLength >= 0) {
   1154             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
   1155                                                      settings->fastLatinPrimaries,
   1156                                                      fastLatinOptions,
   1157                                                      left + equalPrefixLength,
   1158                                                      leftLength - equalPrefixLength,
   1159                                                      right + equalPrefixLength,
   1160                                                      rightLength - equalPrefixLength);
   1161         } else {
   1162             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
   1163                                                      settings->fastLatinPrimaries,
   1164                                                      fastLatinOptions,
   1165                                                      left + equalPrefixLength, -1,
   1166                                                      right + equalPrefixLength, -1);
   1167         }
   1168     } else {
   1169         result = CollationFastLatin::BAIL_OUT_RESULT;
   1170     }
   1171 
   1172     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
   1173         if(settings->dontCheckFCD()) {
   1174             UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
   1175             UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
   1176             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1177         } else {
   1178             FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
   1179             FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
   1180             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1181         }
   1182     }
   1183     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
   1184         return (UCollationResult)result;
   1185     }
   1186 
   1187     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
   1188     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
   1189     // and the benefit seems unlikely to be measurable.
   1190 
   1191     // Compare identical level.
   1192     const Normalizer2Impl &nfcImpl = data->nfcImpl;
   1193     left += equalPrefixLength;
   1194     right += equalPrefixLength;
   1195     if(leftLength > 0) {
   1196         leftLength -= equalPrefixLength;
   1197         rightLength -= equalPrefixLength;
   1198     }
   1199     if(settings->dontCheckFCD()) {
   1200         UTF8NFDIterator leftIter(left, leftLength);
   1201         UTF8NFDIterator rightIter(right, rightLength);
   1202         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1203     } else {
   1204         FCDUTF8NFDIterator leftIter(data, left, leftLength);
   1205         FCDUTF8NFDIterator rightIter(data, right, rightLength);
   1206         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1207     }
   1208 }
   1209 
   1210 UCollationResult
   1211 RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
   1212                            UErrorCode &errorCode) const {
   1213     if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
   1214     UBool numeric = settings->isNumeric();
   1215 
   1216     // Identical-prefix test.
   1217     int32_t equalPrefixLength = 0;
   1218     {
   1219         UChar32 leftUnit;
   1220         UChar32 rightUnit;
   1221         while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
   1222             if(leftUnit < 0) { return UCOL_EQUAL; }
   1223             ++equalPrefixLength;
   1224         }
   1225 
   1226         // Back out the code units that differed, for the real collation comparison.
   1227         if(leftUnit >= 0) { left.previous(&left); }
   1228         if(rightUnit >= 0) { right.previous(&right); }
   1229 
   1230         if(equalPrefixLength > 0) {
   1231             if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
   1232                     (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
   1233                 // Identical prefix: Back up to the start of a contraction or reordering sequence.
   1234                 do {
   1235                     --equalPrefixLength;
   1236                     leftUnit = left.previous(&left);
   1237                     right.previous(&right);
   1238                 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
   1239             }
   1240             // See the notes in the UTF-16 version.
   1241         }
   1242     }
   1243 
   1244     UCollationResult result;
   1245     if(settings->dontCheckFCD()) {
   1246         UIterCollationIterator leftIter(data, numeric, left);
   1247         UIterCollationIterator rightIter(data, numeric, right);
   1248         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1249     } else {
   1250         FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
   1251         FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
   1252         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1253     }
   1254     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
   1255         return result;
   1256     }
   1257 
   1258     // Compare identical level.
   1259     left.move(&left, equalPrefixLength, UITER_ZERO);
   1260     right.move(&right, equalPrefixLength, UITER_ZERO);
   1261     const Normalizer2Impl &nfcImpl = data->nfcImpl;
   1262     if(settings->dontCheckFCD()) {
   1263         UIterNFDIterator leftIter(left);
   1264         UIterNFDIterator rightIter(right);
   1265         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1266     } else {
   1267         FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
   1268         FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
   1269         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1270     }
   1271 }
   1272 
   1273 CollationKey &
   1274 RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
   1275                                    UErrorCode &errorCode) const {
   1276     return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
   1277 }
   1278 
   1279 CollationKey &
   1280 RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey& key,
   1281                                    UErrorCode &errorCode) const {
   1282     if(U_FAILURE(errorCode)) {
   1283         return key.setToBogus();
   1284     }
   1285     if(s == NULL && length != 0) {
   1286         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1287         return key.setToBogus();
   1288     }
   1289     key.reset();  // resets the "bogus" state
   1290     CollationKeyByteSink sink(key);
   1291     writeSortKey(s, length, sink, errorCode);
   1292     if(U_FAILURE(errorCode)) {
   1293         key.setToBogus();
   1294     } else if(key.isBogus()) {
   1295         errorCode = U_MEMORY_ALLOCATION_ERROR;
   1296     } else {
   1297         key.setLength(sink.NumberOfBytesAppended());
   1298     }
   1299     return key;
   1300 }
   1301 
   1302 int32_t
   1303 RuleBasedCollator::getSortKey(const UnicodeString &s,
   1304                               uint8_t *dest, int32_t capacity) const {
   1305     return getSortKey(s.getBuffer(), s.length(), dest, capacity);
   1306 }
   1307 
   1308 int32_t
   1309 RuleBasedCollator::getSortKey(const UChar *s, int32_t length,
   1310                               uint8_t *dest, int32_t capacity) const {
   1311     if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity > 0)) {
   1312         return 0;
   1313     }
   1314     uint8_t noDest[1] = { 0 };
   1315     if(dest == NULL) {
   1316         // Distinguish pure preflighting from an allocation error.
   1317         dest = noDest;
   1318         capacity = 0;
   1319     }
   1320     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
   1321     UErrorCode errorCode = U_ZERO_ERROR;
   1322     writeSortKey(s, length, sink, errorCode);
   1323     return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
   1324 }
   1325 
   1326 void
   1327 RuleBasedCollator::writeSortKey(const UChar *s, int32_t length,
   1328                                 SortKeyByteSink &sink, UErrorCode &errorCode) const {
   1329     if(U_FAILURE(errorCode)) { return; }
   1330     const UChar *limit = (length >= 0) ? s + length : NULL;
   1331     UBool numeric = settings->isNumeric();
   1332     CollationKeys::LevelCallback callback;
   1333     if(settings->dontCheckFCD()) {
   1334         UTF16CollationIterator iter(data, numeric, s, s, limit);
   1335         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
   1336                                                   sink, Collation::PRIMARY_LEVEL,
   1337                                                   callback, TRUE, errorCode);
   1338     } else {
   1339         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
   1340         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
   1341                                                   sink, Collation::PRIMARY_LEVEL,
   1342                                                   callback, TRUE, errorCode);
   1343     }
   1344     if(settings->getStrength() == UCOL_IDENTICAL) {
   1345         writeIdenticalLevel(s, limit, sink, errorCode);
   1346     }
   1347     static const char terminator = 0;  // TERMINATOR_BYTE
   1348     sink.Append(&terminator, 1);
   1349 }
   1350 
   1351 void
   1352 RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit,
   1353                                        SortKeyByteSink &sink, UErrorCode &errorCode) const {
   1354     // NFD quick check
   1355     const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCode);
   1356     if(U_FAILURE(errorCode)) { return; }
   1357     sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
   1358     UChar32 prev = 0;
   1359     if(nfdQCYesLimit != s) {
   1360         prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
   1361     }
   1362     // Is there non-NFD text?
   1363     int32_t destLengthEstimate;
   1364     if(limit != NULL) {
   1365         if(nfdQCYesLimit == limit) { return; }
   1366         destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
   1367     } else {
   1368         // s is NUL-terminated
   1369         if(*nfdQCYesLimit == 0) { return; }
   1370         destLengthEstimate = -1;
   1371     }
   1372     UnicodeString nfd;
   1373     data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
   1374     u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
   1375 }
   1376 
   1377 namespace {
   1378 
   1379 /**
   1380  * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
   1381  * with an instance of this callback class.
   1382  * When another level is about to be written, the callback
   1383  * records the level and the number of bytes that will be written until
   1384  * the sink (which is actually a FixedSortKeyByteSink) fills up.
   1385  *
   1386  * When internalNextSortKeyPart() is called again, it restarts with the last level
   1387  * and ignores as many bytes as were written previously for that level.
   1388  */
   1389 class PartLevelCallback : public CollationKeys::LevelCallback {
   1390 public:
   1391     PartLevelCallback(const SortKeyByteSink &s)
   1392             : sink(s), level(Collation::PRIMARY_LEVEL) {
   1393         levelCapacity = sink.GetRemainingCapacity();
   1394     }
   1395     virtual ~PartLevelCallback() {}
   1396     virtual UBool needToWrite(Collation::Level l) {
   1397         if(!sink.Overflowed()) {
   1398             // Remember a level that will be at least partially written.
   1399             level = l;
   1400             levelCapacity = sink.GetRemainingCapacity();
   1401             return TRUE;
   1402         } else {
   1403             return FALSE;
   1404         }
   1405     }
   1406     Collation::Level getLevel() const { return level; }
   1407     int32_t getLevelCapacity() const { return levelCapacity; }
   1408 
   1409 private:
   1410     const SortKeyByteSink &sink;
   1411     Collation::Level level;
   1412     int32_t levelCapacity;
   1413 };
   1414 
   1415 }  // namespace
   1416 
   1417 int32_t
   1418 RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
   1419                                            uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
   1420     if(U_FAILURE(errorCode)) { return 0; }
   1421     if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)) {
   1422         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1423         return 0;
   1424     }
   1425     if(count == 0) { return 0; }
   1426 
   1427     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
   1428     sink.IgnoreBytes((int32_t)state[1]);
   1429     iter->move(iter, 0, UITER_START);
   1430 
   1431     Collation::Level level = (Collation::Level)state[0];
   1432     if(level <= Collation::QUATERNARY_LEVEL) {
   1433         UBool numeric = settings->isNumeric();
   1434         PartLevelCallback callback(sink);
   1435         if(settings->dontCheckFCD()) {
   1436             UIterCollationIterator ci(data, numeric, *iter);
   1437             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
   1438                                                       sink, level, callback, FALSE, errorCode);
   1439         } else {
   1440             FCDUIterCollationIterator ci(data, numeric, *iter, 0);
   1441             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
   1442                                                       sink, level, callback, FALSE, errorCode);
   1443         }
   1444         if(U_FAILURE(errorCode)) { return 0; }
   1445         if(sink.NumberOfBytesAppended() > count) {
   1446             state[0] = (uint32_t)callback.getLevel();
   1447             state[1] = (uint32_t)callback.getLevelCapacity();
   1448             return count;
   1449         }
   1450         // All of the normal levels are done.
   1451         if(settings->getStrength() == UCOL_IDENTICAL) {
   1452             level = Collation::IDENTICAL_LEVEL;
   1453             iter->move(iter, 0, UITER_START);
   1454         }
   1455         // else fall through to setting ZERO_LEVEL
   1456     }
   1457 
   1458     if(level == Collation::IDENTICAL_LEVEL) {
   1459         int32_t levelCapacity = sink.GetRemainingCapacity();
   1460         UnicodeString s;
   1461         for(;;) {
   1462             UChar32 c = iter->next(iter);
   1463             if(c < 0) { break; }
   1464             s.append((UChar)c);
   1465         }
   1466         const UChar *sArray = s.getBuffer();
   1467         writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
   1468         if(U_FAILURE(errorCode)) { return 0; }
   1469         if(sink.NumberOfBytesAppended() > count) {
   1470             state[0] = (uint32_t)level;
   1471             state[1] = (uint32_t)levelCapacity;
   1472             return count;
   1473         }
   1474     }
   1475 
   1476     // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
   1477     state[0] = (uint32_t)Collation::ZERO_LEVEL;
   1478     state[1] = 0;
   1479     int32_t length = sink.NumberOfBytesAppended();
   1480     int32_t i = length;
   1481     while(i < count) { dest[i++] = 0; }
   1482     return length;
   1483 }
   1484 
   1485 void
   1486 RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
   1487                                   UErrorCode &errorCode) const {
   1488     if(U_FAILURE(errorCode)) { return; }
   1489     const UChar *s = str.getBuffer();
   1490     const UChar *limit = s + str.length();
   1491     UBool numeric = settings->isNumeric();
   1492     if(settings->dontCheckFCD()) {
   1493         UTF16CollationIterator iter(data, numeric, s, s, limit);
   1494         int64_t ce;
   1495         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
   1496             ces.addElement(ce, errorCode);
   1497         }
   1498     } else {
   1499         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
   1500         int64_t ce;
   1501         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
   1502             ces.addElement(ce, errorCode);
   1503         }
   1504     }
   1505 }
   1506 
   1507 namespace {
   1508 
   1509 void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
   1510                   UErrorCode &errorCode) {
   1511     if(U_FAILURE(errorCode) || length == 0) { return; }
   1512     if(!s.isEmpty()) {
   1513         s.append('_', errorCode);
   1514     }
   1515     s.append(letter, errorCode);
   1516     for(int32_t i = 0; i < length; ++i) {
   1517         s.append(uprv_toupper(subtag[i]), errorCode);
   1518     }
   1519 }
   1520 
   1521 void appendAttribute(CharString &s, char letter, UColAttributeValue value,
   1522                      UErrorCode &errorCode) {
   1523     if(U_FAILURE(errorCode)) { return; }
   1524     if(!s.isEmpty()) {
   1525         s.append('_', errorCode);
   1526     }
   1527     static const char *valueChars = "1234...........IXO..SN..LU......";
   1528     s.append(letter, errorCode);
   1529     s.append(valueChars[value], errorCode);
   1530 }
   1531 
   1532 }  // namespace
   1533 
   1534 int32_t
   1535 RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
   1536                                                     char *buffer, int32_t capacity,
   1537                                                     UErrorCode &errorCode) const {
   1538     if(U_FAILURE(errorCode)) { return 0; }
   1539     if(buffer == NULL ? capacity != 0 : capacity < 0) {
   1540         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1541         return 0;
   1542     }
   1543     if(locale == NULL) {
   1544         locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
   1545     }
   1546 
   1547     char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
   1548     int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
   1549                                                   "collation", locale,
   1550                                                   NULL, &errorCode);
   1551     if(U_FAILURE(errorCode)) { return 0; }
   1552     if(length == 0) {
   1553         uprv_strcpy(resultLocale, "root");
   1554     } else {
   1555         resultLocale[length] = 0;
   1556     }
   1557 
   1558     // Append items in alphabetic order of their short definition letters.
   1559     CharString result;
   1560     char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1561 
   1562     if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
   1563         appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
   1564     }
   1565     // ATTR_VARIABLE_TOP not supported because 'B' was broken.
   1566     // See ICU tickets #10372 and #10386.
   1567     if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
   1568         appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
   1569     }
   1570     if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
   1571         appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
   1572     }
   1573     if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
   1574         appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
   1575     }
   1576     if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
   1577         appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
   1578     }
   1579     // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
   1580     length = uloc_getKeywordValue(resultLocale, "collation", subtag, LENGTHOF(subtag), &errorCode);
   1581     appendSubtag(result, 'K', subtag, length, errorCode);
   1582     length = uloc_getLanguage(resultLocale, subtag, LENGTHOF(subtag), &errorCode);
   1583     appendSubtag(result, 'L', subtag, length, errorCode);
   1584     if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
   1585         appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
   1586     }
   1587     length = uloc_getCountry(resultLocale, subtag, LENGTHOF(subtag), &errorCode);
   1588     appendSubtag(result, 'R', subtag, length, errorCode);
   1589     if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
   1590         appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
   1591     }
   1592     length = uloc_getVariant(resultLocale, subtag, LENGTHOF(subtag), &errorCode);
   1593     appendSubtag(result, 'V', subtag, length, errorCode);
   1594     length = uloc_getScript(resultLocale, subtag, LENGTHOF(subtag), &errorCode);
   1595     appendSubtag(result, 'Z', subtag, length, errorCode);
   1596 
   1597     if(U_FAILURE(errorCode)) { return 0; }
   1598     if(result.length() <= capacity) {
   1599         uprv_memcpy(buffer, result.data(), result.length());
   1600     }
   1601     return u_terminateChars(buffer, capacity, result.length(), &errorCode);
   1602 }
   1603 
   1604 UBool
   1605 RuleBasedCollator::isUnsafe(UChar32 c) const {
   1606     return data->isUnsafeBackward(c, settings->isNumeric());
   1607 }
   1608 
   1609 void
   1610 RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
   1611     t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
   1612 }
   1613 
   1614 UBool
   1615 RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
   1616     umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
   1617     return U_SUCCESS(errorCode);
   1618 }
   1619 
   1620 CollationElementIterator *
   1621 RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
   1622     UErrorCode errorCode = U_ZERO_ERROR;
   1623     if(!initMaxExpansions(errorCode)) { return NULL; }
   1624     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
   1625     if(U_FAILURE(errorCode)) {
   1626         delete cei;
   1627         return NULL;
   1628     }
   1629     return cei;
   1630 }
   1631 
   1632 CollationElementIterator *
   1633 RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
   1634     UErrorCode errorCode = U_ZERO_ERROR;
   1635     if(!initMaxExpansions(errorCode)) { return NULL; }
   1636     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
   1637     if(U_FAILURE(errorCode)) {
   1638         delete cei;
   1639         return NULL;
   1640     }
   1641     return cei;
   1642 }
   1643 
   1644 int32_t
   1645 RuleBasedCollator::getMaxExpansion(int32_t order) const {
   1646     UErrorCode errorCode = U_ZERO_ERROR;
   1647     (void)initMaxExpansions(errorCode);
   1648     return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
   1649 }
   1650 
   1651 U_NAMESPACE_END
   1652 
   1653 #endif  // !UCONFIG_NO_COLLATION
   1654