Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1996-2015, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * rulebasedcollator.cpp
      7 *
      8 * (replaced the former tblcoll.cpp)
      9 *
     10 * created on: 2012feb14 with new and old collation code
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #include "unicode/utypes.h"
     15 
     16 #if !UCONFIG_NO_COLLATION
     17 
     18 #include "unicode/coll.h"
     19 #include "unicode/coleitr.h"
     20 #include "unicode/localpointer.h"
     21 #include "unicode/locid.h"
     22 #include "unicode/sortkey.h"
     23 #include "unicode/tblcoll.h"
     24 #include "unicode/ucol.h"
     25 #include "unicode/uiter.h"
     26 #include "unicode/uloc.h"
     27 #include "unicode/uniset.h"
     28 #include "unicode/unistr.h"
     29 #include "unicode/usetiter.h"
     30 #include "unicode/utf8.h"
     31 #include "unicode/uversion.h"
     32 #include "bocsu.h"
     33 #include "charstr.h"
     34 #include "cmemory.h"
     35 #include "collation.h"
     36 #include "collationcompare.h"
     37 #include "collationdata.h"
     38 #include "collationdatareader.h"
     39 #include "collationfastlatin.h"
     40 #include "collationiterator.h"
     41 #include "collationkeys.h"
     42 #include "collationroot.h"
     43 #include "collationsets.h"
     44 #include "collationsettings.h"
     45 #include "collationtailoring.h"
     46 #include "cstring.h"
     47 #include "uassert.h"
     48 #include "ucol_imp.h"
     49 #include "uhash.h"
     50 #include "uitercollationiterator.h"
     51 #include "ustr_imp.h"
     52 #include "utf16collationiterator.h"
     53 #include "utf8collationiterator.h"
     54 #include "uvectr64.h"
     55 
     56 U_NAMESPACE_BEGIN
     57 
     58 namespace {
     59 
     60 class FixedSortKeyByteSink : public SortKeyByteSink {
     61 public:
     62     FixedSortKeyByteSink(char *dest, int32_t destCapacity)
     63             : SortKeyByteSink(dest, destCapacity) {}
     64     virtual ~FixedSortKeyByteSink();
     65 
     66 private:
     67     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
     68     virtual UBool Resize(int32_t appendCapacity, int32_t length);
     69 };
     70 
     71 FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
     72 
     73 void
     74 FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
     75     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
     76     // Fill the buffer completely.
     77     int32_t available = capacity_ - length;
     78     if (available > 0) {
     79         uprv_memcpy(buffer_ + length, bytes, available);
     80     }
     81 }
     82 
     83 UBool
     84 FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
     85     return FALSE;
     86 }
     87 
     88 }  // namespace
     89 
     90 // Not in an anonymous namespace, so that it can be a friend of CollationKey.
     91 class CollationKeyByteSink : public SortKeyByteSink {
     92 public:
     93     CollationKeyByteSink(CollationKey &key)
     94             : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
     95               key_(key) {}
     96     virtual ~CollationKeyByteSink();
     97 
     98 private:
     99     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
    100     virtual UBool Resize(int32_t appendCapacity, int32_t length);
    101 
    102     CollationKey &key_;
    103 };
    104 
    105 CollationKeyByteSink::~CollationKeyByteSink() {}
    106 
    107 void
    108 CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
    109     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
    110     if (Resize(n, length)) {
    111         uprv_memcpy(buffer_ + length, bytes, n);
    112     }
    113 }
    114 
    115 UBool
    116 CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
    117     if (buffer_ == NULL) {
    118         return FALSE;  // allocation failed before already
    119     }
    120     int32_t newCapacity = 2 * capacity_;
    121     int32_t altCapacity = length + 2 * appendCapacity;
    122     if (newCapacity < altCapacity) {
    123         newCapacity = altCapacity;
    124     }
    125     if (newCapacity < 200) {
    126         newCapacity = 200;
    127     }
    128     uint8_t *newBuffer = key_.reallocate(newCapacity, length);
    129     if (newBuffer == NULL) {
    130         SetNotOk();
    131         return FALSE;
    132     }
    133     buffer_ = reinterpret_cast<char *>(newBuffer);
    134     capacity_ = newCapacity;
    135     return TRUE;
    136 }
    137 
    138 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
    139         : Collator(other),
    140           data(other.data),
    141           settings(other.settings),
    142           tailoring(other.tailoring),
    143           cacheEntry(other.cacheEntry),
    144           validLocale(other.validLocale),
    145           explicitlySetAttributes(other.explicitlySetAttributes),
    146           actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
    147     settings->addRef();
    148     cacheEntry->addRef();
    149 }
    150 
    151 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
    152                                      const RuleBasedCollator *base, UErrorCode &errorCode)
    153         : data(NULL),
    154           settings(NULL),
    155           tailoring(NULL),
    156           cacheEntry(NULL),
    157           validLocale(""),
    158           explicitlySetAttributes(0),
    159           actualLocaleIsSameAsValid(FALSE) {
    160     if(U_FAILURE(errorCode)) { return; }
    161     if(bin == NULL || length == 0 || base == NULL) {
    162         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    163         return;
    164     }
    165     const CollationTailoring *root = CollationRoot::getRoot(errorCode);
    166     if(U_FAILURE(errorCode)) { return; }
    167     if(base->tailoring != root) {
    168         errorCode = U_UNSUPPORTED_ERROR;
    169         return;
    170     }
    171     LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
    172     if(t.isNull() || t->isBogus()) {
    173         errorCode = U_MEMORY_ALLOCATION_ERROR;
    174         return;
    175     }
    176     CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
    177     if(U_FAILURE(errorCode)) { return; }
    178     t->actualLocale.setToBogus();
    179     adoptTailoring(t.orphan(), errorCode);
    180 }
    181 
    182 RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
    183         : data(entry->tailoring->data),
    184           settings(entry->tailoring->settings),
    185           tailoring(entry->tailoring),
    186           cacheEntry(entry),
    187           validLocale(entry->validLocale),
    188           explicitlySetAttributes(0),
    189           actualLocaleIsSameAsValid(FALSE) {
    190     settings->addRef();
    191     cacheEntry->addRef();
    192 }
    193 
    194 RuleBasedCollator::~RuleBasedCollator() {
    195     SharedObject::clearPtr(settings);
    196     SharedObject::clearPtr(cacheEntry);
    197 }
    198 
    199 void
    200 RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
    201     if(U_FAILURE(errorCode)) {
    202         t->deleteIfZeroRefCount();
    203         return;
    204     }
    205     U_ASSERT(settings == NULL && data == NULL && tailoring == NULL && cacheEntry == NULL);
    206     cacheEntry = new CollationCacheEntry(t->actualLocale, t);
    207     if(cacheEntry == NULL) {
    208         errorCode = U_MEMORY_ALLOCATION_ERROR;
    209         t->deleteIfZeroRefCount();
    210         return;
    211     }
    212     data = t->data;
    213     settings = t->settings;
    214     settings->addRef();
    215     tailoring = t;
    216     cacheEntry->addRef();
    217     validLocale = t->actualLocale;
    218     actualLocaleIsSameAsValid = FALSE;
    219 }
    220 
    221 Collator *
    222 RuleBasedCollator::clone() const {
    223     return new RuleBasedCollator(*this);
    224 }
    225 
    226 RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
    227     if(this == &other) { return *this; }
    228     SharedObject::copyPtr(other.settings, settings);
    229     tailoring = other.tailoring;
    230     SharedObject::copyPtr(other.cacheEntry, cacheEntry);
    231     data = tailoring->data;
    232     validLocale = other.validLocale;
    233     explicitlySetAttributes = other.explicitlySetAttributes;
    234     actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
    235     return *this;
    236 }
    237 
    238 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
    239 
    240 UBool
    241 RuleBasedCollator::operator==(const Collator& other) const {
    242     if(this == &other) { return TRUE; }
    243     if(!Collator::operator==(other)) { return FALSE; }
    244     const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
    245     if(*settings != *o.settings) { return FALSE; }
    246     if(data == o.data) { return TRUE; }
    247     UBool thisIsRoot = data->base == NULL;
    248     UBool otherIsRoot = o.data->base == NULL;
    249     U_ASSERT(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
    250     if(thisIsRoot != otherIsRoot) { return FALSE; }
    251     if((thisIsRoot || !tailoring->rules.isEmpty()) &&
    252             (otherIsRoot || !o.tailoring->rules.isEmpty())) {
    253         // Shortcut: If both collators have valid rule strings, then compare those.
    254         if(tailoring->rules == o.tailoring->rules) { return TRUE; }
    255     }
    256     // Different rule strings can result in the same or equivalent tailoring.
    257     // The rule strings are optional in ICU resource bundles, although included by default.
    258     // cloneBinary() drops the rule string.
    259     UErrorCode errorCode = U_ZERO_ERROR;
    260     LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
    261     LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
    262     if(U_FAILURE(errorCode)) { return FALSE; }
    263     if(*thisTailored != *otherTailored) { return FALSE; }
    264     // For completeness, we should compare all of the mappings;
    265     // or we should create a list of strings, sort it with one collator,
    266     // and check if both collators compare adjacent strings the same
    267     // (order & strength, down to quaternary); or similar.
    268     // Testing equality of collators seems unusual.
    269     return TRUE;
    270 }
    271 
    272 int32_t
    273 RuleBasedCollator::hashCode() const {
    274     int32_t h = settings->hashCode();
    275     if(data->base == NULL) { return h; }  // root collator
    276     // Do not rely on the rule string, see comments in operator==().
    277     UErrorCode errorCode = U_ZERO_ERROR;
    278     LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
    279     if(U_FAILURE(errorCode)) { return 0; }
    280     UnicodeSetIterator iter(*set);
    281     while(iter.next() && !iter.isString()) {
    282         h ^= data->getCE32(iter.getCodepoint());
    283     }
    284     return h;
    285 }
    286 
    287 void
    288 RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
    289                               const Locale &actual) {
    290     if(actual == tailoring->actualLocale) {
    291         actualLocaleIsSameAsValid = FALSE;
    292     } else {
    293         U_ASSERT(actual == valid);
    294         actualLocaleIsSameAsValid = TRUE;
    295     }
    296     // Do not modify tailoring.actualLocale:
    297     // We cannot be sure that that would be thread-safe.
    298     validLocale = valid;
    299     (void)requested;  // Ignore, see also ticket #10477.
    300 }
    301 
    302 Locale
    303 RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
    304     if(U_FAILURE(errorCode)) {
    305         return Locale::getRoot();
    306     }
    307     switch(type) {
    308     case ULOC_ACTUAL_LOCALE:
    309         return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
    310     case ULOC_VALID_LOCALE:
    311         return validLocale;
    312     case ULOC_REQUESTED_LOCALE:
    313     default:
    314         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    315         return Locale::getRoot();
    316     }
    317 }
    318 
    319 const char *
    320 RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
    321     if(U_FAILURE(errorCode)) {
    322         return NULL;
    323     }
    324     const Locale *result;
    325     switch(type) {
    326     case ULOC_ACTUAL_LOCALE:
    327         result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
    328         break;
    329     case ULOC_VALID_LOCALE:
    330         result = &validLocale;
    331         break;
    332     case ULOC_REQUESTED_LOCALE:
    333     default:
    334         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    335         return NULL;
    336     }
    337     if(result->isBogus()) { return NULL; }
    338     const char *id = result->getName();
    339     return id[0] == 0 ? "root" : id;
    340 }
    341 
    342 const UnicodeString&
    343 RuleBasedCollator::getRules() const {
    344     return tailoring->rules;
    345 }
    346 
    347 void
    348 RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
    349     if(delta == UCOL_TAILORING_ONLY) {
    350         buffer = tailoring->rules;
    351         return;
    352     }
    353     // UCOL_FULL_RULES
    354     buffer.remove();
    355     CollationLoader::appendRootRules(buffer);
    356     buffer.append(tailoring->rules).getTerminatedBuffer();
    357 }
    358 
    359 void
    360 RuleBasedCollator::getVersion(UVersionInfo version) const {
    361     uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
    362     version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
    363 }
    364 
    365 UnicodeSet *
    366 RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
    367     if(U_FAILURE(errorCode)) { return NULL; }
    368     UnicodeSet *tailored = new UnicodeSet();
    369     if(tailored == NULL) {
    370         errorCode = U_MEMORY_ALLOCATION_ERROR;
    371         return NULL;
    372     }
    373     if(data->base != NULL) {
    374         TailoredSet(tailored).forData(data, errorCode);
    375         if(U_FAILURE(errorCode)) {
    376             delete tailored;
    377             return NULL;
    378         }
    379     }
    380     return tailored;
    381 }
    382 
    383 void
    384 RuleBasedCollator::internalGetContractionsAndExpansions(
    385         UnicodeSet *contractions, UnicodeSet *expansions,
    386         UBool addPrefixes, UErrorCode &errorCode) const {
    387     if(U_FAILURE(errorCode)) { return; }
    388     if(contractions != NULL) {
    389         contractions->clear();
    390     }
    391     if(expansions != NULL) {
    392         expansions->clear();
    393     }
    394     ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forData(data, errorCode);
    395 }
    396 
    397 void
    398 RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
    399     if(U_FAILURE(errorCode)) { return; }
    400     ContractionsAndExpansions(&set, NULL, NULL, FALSE).forCodePoint(data, c, errorCode);
    401 }
    402 
    403 const CollationSettings &
    404 RuleBasedCollator::getDefaultSettings() const {
    405     return *tailoring->settings;
    406 }
    407 
    408 UColAttributeValue
    409 RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
    410     if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
    411     int32_t option;
    412     switch(attr) {
    413     case UCOL_FRENCH_COLLATION:
    414         option = CollationSettings::BACKWARD_SECONDARY;
    415         break;
    416     case UCOL_ALTERNATE_HANDLING:
    417         return settings->getAlternateHandling();
    418     case UCOL_CASE_FIRST:
    419         return settings->getCaseFirst();
    420     case UCOL_CASE_LEVEL:
    421         option = CollationSettings::CASE_LEVEL;
    422         break;
    423     case UCOL_NORMALIZATION_MODE:
    424         option = CollationSettings::CHECK_FCD;
    425         break;
    426     case UCOL_STRENGTH:
    427         return (UColAttributeValue)settings->getStrength();
    428     case UCOL_HIRAGANA_QUATERNARY_MODE:
    429         // Deprecated attribute, unsettable.
    430         return UCOL_OFF;
    431     case UCOL_NUMERIC_COLLATION:
    432         option = CollationSettings::NUMERIC;
    433         break;
    434     default:
    435         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    436         return UCOL_DEFAULT;
    437     }
    438     return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
    439 }
    440 
    441 void
    442 RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
    443                                 UErrorCode &errorCode) {
    444     UColAttributeValue oldValue = getAttribute(attr, errorCode);
    445     if(U_FAILURE(errorCode)) { return; }
    446     if(value == oldValue) {
    447         setAttributeExplicitly(attr);
    448         return;
    449     }
    450     const CollationSettings &defaultSettings = getDefaultSettings();
    451     if(settings == &defaultSettings) {
    452         if(value == UCOL_DEFAULT) {
    453             setAttributeDefault(attr);
    454             return;
    455         }
    456     }
    457     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    458     if(ownedSettings == NULL) {
    459         errorCode = U_MEMORY_ALLOCATION_ERROR;
    460         return;
    461     }
    462 
    463     switch(attr) {
    464     case UCOL_FRENCH_COLLATION:
    465         ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
    466                                defaultSettings.options, errorCode);
    467         break;
    468     case UCOL_ALTERNATE_HANDLING:
    469         ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
    470         break;
    471     case UCOL_CASE_FIRST:
    472         ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
    473         break;
    474     case UCOL_CASE_LEVEL:
    475         ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
    476                                defaultSettings.options, errorCode);
    477         break;
    478     case UCOL_NORMALIZATION_MODE:
    479         ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
    480                                defaultSettings.options, errorCode);
    481         break;
    482     case UCOL_STRENGTH:
    483         ownedSettings->setStrength(value, defaultSettings.options, errorCode);
    484         break;
    485     case UCOL_HIRAGANA_QUATERNARY_MODE:
    486         // Deprecated attribute. Check for valid values but do not change anything.
    487         if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
    488             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    489         }
    490         break;
    491     case UCOL_NUMERIC_COLLATION:
    492         ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
    493         break;
    494     default:
    495         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    496         break;
    497     }
    498     if(U_FAILURE(errorCode)) { return; }
    499     setFastLatinOptions(*ownedSettings);
    500     if(value == UCOL_DEFAULT) {
    501         setAttributeDefault(attr);
    502     } else {
    503         setAttributeExplicitly(attr);
    504     }
    505 }
    506 
    507 Collator &
    508 RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
    509     if(U_FAILURE(errorCode)) { return *this; }
    510     // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
    511     int32_t value;
    512     if(group == UCOL_REORDER_CODE_DEFAULT) {
    513         value = UCOL_DEFAULT;
    514     } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
    515         value = group - UCOL_REORDER_CODE_FIRST;
    516     } else {
    517         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    518         return *this;
    519     }
    520     CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
    521     if(value == oldValue) {
    522         setAttributeExplicitly(ATTR_VARIABLE_TOP);
    523         return *this;
    524     }
    525     const CollationSettings &defaultSettings = getDefaultSettings();
    526     if(settings == &defaultSettings) {
    527         if(value == UCOL_DEFAULT) {
    528             setAttributeDefault(ATTR_VARIABLE_TOP);
    529             return *this;
    530         }
    531     }
    532     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    533     if(ownedSettings == NULL) {
    534         errorCode = U_MEMORY_ALLOCATION_ERROR;
    535         return *this;
    536     }
    537 
    538     if(group == UCOL_REORDER_CODE_DEFAULT) {
    539         group = (UColReorderCode)(UCOL_REORDER_CODE_FIRST + defaultSettings.getMaxVariable());
    540     }
    541     uint32_t varTop = data->getLastPrimaryForGroup(group);
    542     U_ASSERT(varTop != 0);
    543     ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
    544     if(U_FAILURE(errorCode)) { return *this; }
    545     ownedSettings->variableTop = varTop;
    546     setFastLatinOptions(*ownedSettings);
    547     if(value == UCOL_DEFAULT) {
    548         setAttributeDefault(ATTR_VARIABLE_TOP);
    549     } else {
    550         setAttributeExplicitly(ATTR_VARIABLE_TOP);
    551     }
    552     return *this;
    553 }
    554 
    555 UColReorderCode
    556 RuleBasedCollator::getMaxVariable() const {
    557     return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());
    558 }
    559 
    560 uint32_t
    561 RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
    562     return settings->variableTop;
    563 }
    564 
    565 uint32_t
    566 RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &errorCode) {
    567     if(U_FAILURE(errorCode)) { return 0; }
    568     if(varTop == NULL && len !=0) {
    569         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    570         return 0;
    571     }
    572     if(len < 0) { len = u_strlen(varTop); }
    573     if(len == 0) {
    574         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    575         return 0;
    576     }
    577     UBool numeric = settings->isNumeric();
    578     int64_t ce1, ce2;
    579     if(settings->dontCheckFCD()) {
    580         UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
    581         ce1 = ci.nextCE(errorCode);
    582         ce2 = ci.nextCE(errorCode);
    583     } else {
    584         FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
    585         ce1 = ci.nextCE(errorCode);
    586         ce2 = ci.nextCE(errorCode);
    587     }
    588     if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
    589         errorCode = U_CE_NOT_FOUND_ERROR;
    590         return 0;
    591     }
    592     setVariableTop((uint32_t)(ce1 >> 32), errorCode);
    593     return settings->variableTop;
    594 }
    595 
    596 uint32_t
    597 RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
    598     return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
    599 }
    600 
    601 void
    602 RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
    603     if(U_FAILURE(errorCode)) { return; }
    604     if(varTop != settings->variableTop) {
    605         // Pin the variable top to the end of the reordering group which contains it.
    606         // Only a few special groups are supported.
    607         int32_t group = data->getGroupForPrimary(varTop);
    608         if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
    609             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    610             return;
    611         }
    612         uint32_t v = data->getLastPrimaryForGroup(group);
    613         U_ASSERT(v != 0 && v >= varTop);
    614         varTop = v;
    615         if(varTop != settings->variableTop) {
    616             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    617             if(ownedSettings == NULL) {
    618                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    619                 return;
    620             }
    621             ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
    622                                           getDefaultSettings().options, errorCode);
    623             if(U_FAILURE(errorCode)) { return; }
    624             ownedSettings->variableTop = varTop;
    625             setFastLatinOptions(*ownedSettings);
    626         }
    627     }
    628     if(varTop == getDefaultSettings().variableTop) {
    629         setAttributeDefault(ATTR_VARIABLE_TOP);
    630     } else {
    631         setAttributeExplicitly(ATTR_VARIABLE_TOP);
    632     }
    633 }
    634 
    635 int32_t
    636 RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
    637                                    UErrorCode &errorCode) const {
    638     if(U_FAILURE(errorCode)) { return 0; }
    639     if(capacity < 0 || (dest == NULL && capacity > 0)) {
    640         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    641         return 0;
    642     }
    643     int32_t length = settings->reorderCodesLength;
    644     if(length == 0) { return 0; }
    645     if(length > capacity) {
    646         errorCode = U_BUFFER_OVERFLOW_ERROR;
    647         return length;
    648     }
    649     uprv_memcpy(dest, settings->reorderCodes, length * 4);
    650     return length;
    651 }
    652 
    653 void
    654 RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
    655                                    UErrorCode &errorCode) {
    656     if(U_FAILURE(errorCode)) { return; }
    657     if(length < 0 || (reorderCodes == NULL && length > 0)) {
    658         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    659         return;
    660     }
    661     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
    662         length = 0;
    663     }
    664     if(length == settings->reorderCodesLength &&
    665             uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
    666         return;
    667     }
    668     const CollationSettings &defaultSettings = getDefaultSettings();
    669     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
    670         if(settings != &defaultSettings) {
    671             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    672             if(ownedSettings == NULL) {
    673                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    674                 return;
    675             }
    676             ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
    677             setFastLatinOptions(*ownedSettings);
    678         }
    679         return;
    680     }
    681     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    682     if(ownedSettings == NULL) {
    683         errorCode = U_MEMORY_ALLOCATION_ERROR;
    684         return;
    685     }
    686     ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
    687     setFastLatinOptions(*ownedSettings);
    688 }
    689 
    690 void
    691 RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
    692     ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
    693             data, ownedSettings,
    694             ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));
    695 }
    696 
    697 UCollationResult
    698 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
    699                            UErrorCode &errorCode) const {
    700     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    701     return doCompare(left.getBuffer(), left.length(),
    702                      right.getBuffer(), right.length(), errorCode);
    703 }
    704 
    705 UCollationResult
    706 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
    707                            int32_t length, UErrorCode &errorCode) const {
    708     if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
    709     if(length < 0) {
    710         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    711         return UCOL_EQUAL;
    712     }
    713     int32_t leftLength = left.length();
    714     int32_t rightLength = right.length();
    715     if(leftLength > length) { leftLength = length; }
    716     if(rightLength > length) { rightLength = length; }
    717     return doCompare(left.getBuffer(), leftLength,
    718                      right.getBuffer(), rightLength, errorCode);
    719 }
    720 
    721 UCollationResult
    722 RuleBasedCollator::compare(const UChar *left, int32_t leftLength,
    723                            const UChar *right, int32_t rightLength,
    724                            UErrorCode &errorCode) const {
    725     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    726     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
    727         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    728         return UCOL_EQUAL;
    729     }
    730     // Make sure both or neither strings have a known length.
    731     // We do not optimize for mixed length/termination.
    732     if(leftLength >= 0) {
    733         if(rightLength < 0) { rightLength = u_strlen(right); }
    734     } else {
    735         if(rightLength >= 0) { leftLength = u_strlen(left); }
    736     }
    737     return doCompare(left, leftLength, right, rightLength, errorCode);
    738 }
    739 
    740 UCollationResult
    741 RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
    742                                UErrorCode &errorCode) const {
    743     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    744     const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
    745     const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
    746     if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.empty())) {
    747         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    748         return UCOL_EQUAL;
    749     }
    750     return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
    751 }
    752 
    753 UCollationResult
    754 RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
    755                                        const char *right, int32_t rightLength,
    756                                        UErrorCode &errorCode) const {
    757     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    758     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
    759         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    760         return UCOL_EQUAL;
    761     }
    762     // Make sure both or neither strings have a known length.
    763     // We do not optimize for mixed length/termination.
    764     if(leftLength >= 0) {
    765         if(rightLength < 0) { rightLength = uprv_strlen(right); }
    766     } else {
    767         if(rightLength >= 0) { leftLength = uprv_strlen(left); }
    768     }
    769     return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
    770                      reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
    771 }
    772 
    773 namespace {
    774 
    775 /**
    776  * Abstract iterator for identical-level string comparisons.
    777  * Returns FCD code points and handles temporary switching to NFD.
    778  */
    779 class NFDIterator : public UObject {
    780 public:
    781     NFDIterator() : index(-1), length(0) {}
    782     virtual ~NFDIterator() {}
    783     /**
    784      * Returns the next code point from the internal normalization buffer,
    785      * or else the next text code point.
    786      * Returns -1 at the end of the text.
    787      */
    788     UChar32 nextCodePoint() {
    789         if(index >= 0) {
    790             if(index == length) {
    791                 index = -1;
    792             } else {
    793                 UChar32 c;
    794                 U16_NEXT_UNSAFE(decomp, index, c);
    795                 return c;
    796             }
    797         }
    798         return nextRawCodePoint();
    799     }
    800     /**
    801      * @param nfcImpl
    802      * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
    803      * @return the first code point in c's decomposition,
    804      *         or c itself if it was decomposed already or if it does not decompose
    805      */
    806     UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
    807         if(index >= 0) { return c; }
    808         decomp = nfcImpl.getDecomposition(c, buffer, length);
    809         if(decomp == NULL) { return c; }
    810         index = 0;
    811         U16_NEXT_UNSAFE(decomp, index, c);
    812         return c;
    813     }
    814 protected:
    815     /**
    816      * Returns the next text code point in FCD order.
    817      * Returns -1 at the end of the text.
    818      */
    819     virtual UChar32 nextRawCodePoint() = 0;
    820 private:
    821     const UChar *decomp;
    822     UChar buffer[4];
    823     int32_t index;
    824     int32_t length;
    825 };
    826 
    827 class UTF16NFDIterator : public NFDIterator {
    828 public:
    829     UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {}
    830 protected:
    831     virtual UChar32 nextRawCodePoint() {
    832         if(s == limit) { return U_SENTINEL; }
    833         UChar32 c = *s++;
    834         if(limit == NULL && c == 0) {
    835             s = NULL;
    836             return U_SENTINEL;
    837         }
    838         UChar trail;
    839         if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
    840             ++s;
    841             c = U16_GET_SUPPLEMENTARY(c, trail);
    842         }
    843         return c;
    844     }
    845 
    846     const UChar *s;
    847     const UChar *limit;
    848 };
    849 
    850 class FCDUTF16NFDIterator : public UTF16NFDIterator {
    851 public:
    852     FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const UChar *textLimit)
    853             : UTF16NFDIterator(NULL, NULL) {
    854         UErrorCode errorCode = U_ZERO_ERROR;
    855         const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCode);
    856         if(U_FAILURE(errorCode)) { return; }
    857         if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) {
    858             s = text;
    859             limit = spanLimit;
    860         } else {
    861             str.setTo(text, (int32_t)(spanLimit - text));
    862             {
    863                 ReorderingBuffer buffer(nfcImpl, str);
    864                 if(buffer.init(str.length(), errorCode)) {
    865                     nfcImpl.makeFCD(spanLimit, textLimit, &buffer, errorCode);
    866                 }
    867             }
    868             if(U_SUCCESS(errorCode)) {
    869                 s = str.getBuffer();
    870                 limit = s + str.length();
    871             }
    872         }
    873     }
    874 private:
    875     UnicodeString str;
    876 };
    877 
    878 class UTF8NFDIterator : public NFDIterator {
    879 public:
    880     UTF8NFDIterator(const uint8_t *text, int32_t textLength)
    881         : s(text), pos(0), length(textLength) {}
    882 protected:
    883     virtual UChar32 nextRawCodePoint() {
    884         if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
    885         UChar32 c;
    886         U8_NEXT_OR_FFFD(s, pos, length, c);
    887         return c;
    888     }
    889 
    890     const uint8_t *s;
    891     int32_t pos;
    892     int32_t length;
    893 };
    894 
    895 class FCDUTF8NFDIterator : public NFDIterator {
    896 public:
    897     FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
    898             : u8ci(data, FALSE, text, 0, textLength) {}
    899 protected:
    900     virtual UChar32 nextRawCodePoint() {
    901         UErrorCode errorCode = U_ZERO_ERROR;
    902         return u8ci.nextCodePoint(errorCode);
    903     }
    904 private:
    905     FCDUTF8CollationIterator u8ci;
    906 };
    907 
    908 class UIterNFDIterator : public NFDIterator {
    909 public:
    910     UIterNFDIterator(UCharIterator &it) : iter(it) {}
    911 protected:
    912     virtual UChar32 nextRawCodePoint() {
    913         return uiter_next32(&iter);
    914     }
    915 private:
    916     UCharIterator &iter;
    917 };
    918 
    919 class FCDUIterNFDIterator : public NFDIterator {
    920 public:
    921     FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
    922             : uici(data, FALSE, it, startIndex) {}
    923 protected:
    924     virtual UChar32 nextRawCodePoint() {
    925         UErrorCode errorCode = U_ZERO_ERROR;
    926         return uici.nextCodePoint(errorCode);
    927     }
    928 private:
    929     FCDUIterCollationIterator uici;
    930 };
    931 
    932 UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
    933                                 NFDIterator &left, NFDIterator &right) {
    934     for(;;) {
    935         // Fetch the next FCD code point from each string.
    936         UChar32 leftCp = left.nextCodePoint();
    937         UChar32 rightCp = right.nextCodePoint();
    938         if(leftCp == rightCp) {
    939             if(leftCp < 0) { break; }
    940             continue;
    941         }
    942         // If they are different, then decompose each and compare again.
    943         if(leftCp < 0) {
    944             leftCp = -2;  // end of string
    945         } else if(leftCp == 0xfffe) {
    946             leftCp = -1;  // U+FFFE: merge separator
    947         } else {
    948             leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
    949         }
    950         if(rightCp < 0) {
    951             rightCp = -2;  // end of string
    952         } else if(rightCp == 0xfffe) {
    953             rightCp = -1;  // U+FFFE: merge separator
    954         } else {
    955             rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
    956         }
    957         if(leftCp < rightCp) { return UCOL_LESS; }
    958         if(leftCp > rightCp) { return UCOL_GREATER; }
    959     }
    960     return UCOL_EQUAL;
    961 }
    962 
    963 }  // namespace
    964 
    965 UCollationResult
    966 RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength,
    967                              const UChar *right, int32_t rightLength,
    968                              UErrorCode &errorCode) const {
    969     // U_FAILURE(errorCode) checked by caller.
    970     if(left == right && leftLength == rightLength) {
    971         return UCOL_EQUAL;
    972     }
    973 
    974     // Identical-prefix test.
    975     const UChar *leftLimit;
    976     const UChar *rightLimit;
    977     int32_t equalPrefixLength = 0;
    978     if(leftLength < 0) {
    979         leftLimit = NULL;
    980         rightLimit = NULL;
    981         UChar c;
    982         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
    983             if(c == 0) { return UCOL_EQUAL; }
    984             ++equalPrefixLength;
    985         }
    986     } else {
    987         leftLimit = left + leftLength;
    988         rightLimit = right + rightLength;
    989         for(;;) {
    990             if(equalPrefixLength == leftLength) {
    991                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
    992                 break;
    993             } else if(equalPrefixLength == rightLength ||
    994                       left[equalPrefixLength] != right[equalPrefixLength]) {
    995                 break;
    996             }
    997             ++equalPrefixLength;
    998         }
    999     }
   1000 
   1001     UBool numeric = settings->isNumeric();
   1002     if(equalPrefixLength > 0) {
   1003         if((equalPrefixLength != leftLength &&
   1004                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
   1005                 (equalPrefixLength != rightLength &&
   1006                     data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
   1007             // Identical prefix: Back up to the start of a contraction or reordering sequence.
   1008             while(--equalPrefixLength > 0 &&
   1009                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
   1010         }
   1011         // Notes:
   1012         // - A longer string can compare equal to a prefix of it if only ignorables follow.
   1013         // - With a backward level, a longer string can compare less-than a prefix of it.
   1014 
   1015         // Pass the actual start of each string into the CollationIterators,
   1016         // plus the equalPrefixLength position,
   1017         // so that prefix matches back into the equal prefix work.
   1018     }
   1019 
   1020     int32_t result;
   1021     int32_t fastLatinOptions = settings->fastLatinOptions;
   1022     if(fastLatinOptions >= 0 &&
   1023             (equalPrefixLength == leftLength ||
   1024                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
   1025             (equalPrefixLength == rightLength ||
   1026                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
   1027         if(leftLength >= 0) {
   1028             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
   1029                                                       settings->fastLatinPrimaries,
   1030                                                       fastLatinOptions,
   1031                                                       left + equalPrefixLength,
   1032                                                       leftLength - equalPrefixLength,
   1033                                                       right + equalPrefixLength,
   1034                                                       rightLength - equalPrefixLength);
   1035         } else {
   1036             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
   1037                                                       settings->fastLatinPrimaries,
   1038                                                       fastLatinOptions,
   1039                                                       left + equalPrefixLength, -1,
   1040                                                       right + equalPrefixLength, -1);
   1041         }
   1042     } else {
   1043         result = CollationFastLatin::BAIL_OUT_RESULT;
   1044     }
   1045 
   1046     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
   1047         if(settings->dontCheckFCD()) {
   1048             UTF16CollationIterator leftIter(data, numeric,
   1049                                             left, left + equalPrefixLength, leftLimit);
   1050             UTF16CollationIterator rightIter(data, numeric,
   1051                                             right, right + equalPrefixLength, rightLimit);
   1052             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1053         } else {
   1054             FCDUTF16CollationIterator leftIter(data, numeric,
   1055                                               left, left + equalPrefixLength, leftLimit);
   1056             FCDUTF16CollationIterator rightIter(data, numeric,
   1057                                                 right, right + equalPrefixLength, rightLimit);
   1058             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1059         }
   1060     }
   1061     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
   1062         return (UCollationResult)result;
   1063     }
   1064 
   1065     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
   1066     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
   1067     // and the benefit seems unlikely to be measurable.
   1068 
   1069     // Compare identical level.
   1070     const Normalizer2Impl &nfcImpl = data->nfcImpl;
   1071     left += equalPrefixLength;
   1072     right += equalPrefixLength;
   1073     if(settings->dontCheckFCD()) {
   1074         UTF16NFDIterator leftIter(left, leftLimit);
   1075         UTF16NFDIterator rightIter(right, rightLimit);
   1076         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1077     } else {
   1078         FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
   1079         FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
   1080         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1081     }
   1082 }
   1083 
   1084 UCollationResult
   1085 RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
   1086                              const uint8_t *right, int32_t rightLength,
   1087                              UErrorCode &errorCode) const {
   1088     // U_FAILURE(errorCode) checked by caller.
   1089     if(left == right && leftLength == rightLength) {
   1090         return UCOL_EQUAL;
   1091     }
   1092 
   1093     // Identical-prefix test.
   1094     int32_t equalPrefixLength = 0;
   1095     if(leftLength < 0) {
   1096         uint8_t c;
   1097         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
   1098             if(c == 0) { return UCOL_EQUAL; }
   1099             ++equalPrefixLength;
   1100         }
   1101     } else {
   1102         for(;;) {
   1103             if(equalPrefixLength == leftLength) {
   1104                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
   1105                 break;
   1106             } else if(equalPrefixLength == rightLength ||
   1107                       left[equalPrefixLength] != right[equalPrefixLength]) {
   1108                 break;
   1109             }
   1110             ++equalPrefixLength;
   1111         }
   1112     }
   1113     // Back up to the start of a partially-equal code point.
   1114     if(equalPrefixLength > 0 &&
   1115             ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
   1116             (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
   1117         while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
   1118     }
   1119 
   1120     UBool numeric = settings->isNumeric();
   1121     if(equalPrefixLength > 0) {
   1122         UBool unsafe = FALSE;
   1123         if(equalPrefixLength != leftLength) {
   1124             int32_t i = equalPrefixLength;
   1125             UChar32 c;
   1126             U8_NEXT_OR_FFFD(left, i, leftLength, c);
   1127             unsafe = data->isUnsafeBackward(c, numeric);
   1128         }
   1129         if(!unsafe && equalPrefixLength != rightLength) {
   1130             int32_t i = equalPrefixLength;
   1131             UChar32 c;
   1132             U8_NEXT_OR_FFFD(right, i, rightLength, c);
   1133             unsafe = data->isUnsafeBackward(c, numeric);
   1134         }
   1135         if(unsafe) {
   1136             // Identical prefix: Back up to the start of a contraction or reordering sequence.
   1137             UChar32 c;
   1138             do {
   1139                 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
   1140             } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
   1141         }
   1142         // See the notes in the UTF-16 version.
   1143 
   1144         // Pass the actual start of each string into the CollationIterators,
   1145         // plus the equalPrefixLength position,
   1146         // so that prefix matches back into the equal prefix work.
   1147     }
   1148 
   1149     int32_t result;
   1150     int32_t fastLatinOptions = settings->fastLatinOptions;
   1151     if(fastLatinOptions >= 0 &&
   1152             (equalPrefixLength == leftLength ||
   1153                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
   1154             (equalPrefixLength == rightLength ||
   1155                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
   1156         if(leftLength >= 0) {
   1157             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
   1158                                                      settings->fastLatinPrimaries,
   1159                                                      fastLatinOptions,
   1160                                                      left + equalPrefixLength,
   1161                                                      leftLength - equalPrefixLength,
   1162                                                      right + equalPrefixLength,
   1163                                                      rightLength - equalPrefixLength);
   1164         } else {
   1165             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
   1166                                                      settings->fastLatinPrimaries,
   1167                                                      fastLatinOptions,
   1168                                                      left + equalPrefixLength, -1,
   1169                                                      right + equalPrefixLength, -1);
   1170         }
   1171     } else {
   1172         result = CollationFastLatin::BAIL_OUT_RESULT;
   1173     }
   1174 
   1175     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
   1176         if(settings->dontCheckFCD()) {
   1177             UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
   1178             UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
   1179             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1180         } else {
   1181             FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
   1182             FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
   1183             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1184         }
   1185     }
   1186     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
   1187         return (UCollationResult)result;
   1188     }
   1189 
   1190     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
   1191     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
   1192     // and the benefit seems unlikely to be measurable.
   1193 
   1194     // Compare identical level.
   1195     const Normalizer2Impl &nfcImpl = data->nfcImpl;
   1196     left += equalPrefixLength;
   1197     right += equalPrefixLength;
   1198     if(leftLength > 0) {
   1199         leftLength -= equalPrefixLength;
   1200         rightLength -= equalPrefixLength;
   1201     }
   1202     if(settings->dontCheckFCD()) {
   1203         UTF8NFDIterator leftIter(left, leftLength);
   1204         UTF8NFDIterator rightIter(right, rightLength);
   1205         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1206     } else {
   1207         FCDUTF8NFDIterator leftIter(data, left, leftLength);
   1208         FCDUTF8NFDIterator rightIter(data, right, rightLength);
   1209         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1210     }
   1211 }
   1212 
   1213 UCollationResult
   1214 RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
   1215                            UErrorCode &errorCode) const {
   1216     if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
   1217     UBool numeric = settings->isNumeric();
   1218 
   1219     // Identical-prefix test.
   1220     int32_t equalPrefixLength = 0;
   1221     {
   1222         UChar32 leftUnit;
   1223         UChar32 rightUnit;
   1224         while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
   1225             if(leftUnit < 0) { return UCOL_EQUAL; }
   1226             ++equalPrefixLength;
   1227         }
   1228 
   1229         // Back out the code units that differed, for the real collation comparison.
   1230         if(leftUnit >= 0) { left.previous(&left); }
   1231         if(rightUnit >= 0) { right.previous(&right); }
   1232 
   1233         if(equalPrefixLength > 0) {
   1234             if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
   1235                     (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
   1236                 // Identical prefix: Back up to the start of a contraction or reordering sequence.
   1237                 do {
   1238                     --equalPrefixLength;
   1239                     leftUnit = left.previous(&left);
   1240                     right.previous(&right);
   1241                 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
   1242             }
   1243             // See the notes in the UTF-16 version.
   1244         }
   1245     }
   1246 
   1247     UCollationResult result;
   1248     if(settings->dontCheckFCD()) {
   1249         UIterCollationIterator leftIter(data, numeric, left);
   1250         UIterCollationIterator rightIter(data, numeric, right);
   1251         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1252     } else {
   1253         FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
   1254         FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
   1255         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1256     }
   1257     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
   1258         return result;
   1259     }
   1260 
   1261     // Compare identical level.
   1262     left.move(&left, equalPrefixLength, UITER_ZERO);
   1263     right.move(&right, equalPrefixLength, UITER_ZERO);
   1264     const Normalizer2Impl &nfcImpl = data->nfcImpl;
   1265     if(settings->dontCheckFCD()) {
   1266         UIterNFDIterator leftIter(left);
   1267         UIterNFDIterator rightIter(right);
   1268         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1269     } else {
   1270         FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
   1271         FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
   1272         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1273     }
   1274 }
   1275 
   1276 CollationKey &
   1277 RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
   1278                                    UErrorCode &errorCode) const {
   1279     return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
   1280 }
   1281 
   1282 CollationKey &
   1283 RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey& key,
   1284                                    UErrorCode &errorCode) const {
   1285     if(U_FAILURE(errorCode)) {
   1286         return key.setToBogus();
   1287     }
   1288     if(s == NULL && length != 0) {
   1289         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1290         return key.setToBogus();
   1291     }
   1292     key.reset();  // resets the "bogus" state
   1293     CollationKeyByteSink sink(key);
   1294     writeSortKey(s, length, sink, errorCode);
   1295     if(U_FAILURE(errorCode)) {
   1296         key.setToBogus();
   1297     } else if(key.isBogus()) {
   1298         errorCode = U_MEMORY_ALLOCATION_ERROR;
   1299     } else {
   1300         key.setLength(sink.NumberOfBytesAppended());
   1301     }
   1302     return key;
   1303 }
   1304 
   1305 int32_t
   1306 RuleBasedCollator::getSortKey(const UnicodeString &s,
   1307                               uint8_t *dest, int32_t capacity) const {
   1308     return getSortKey(s.getBuffer(), s.length(), dest, capacity);
   1309 }
   1310 
   1311 int32_t
   1312 RuleBasedCollator::getSortKey(const UChar *s, int32_t length,
   1313                               uint8_t *dest, int32_t capacity) const {
   1314     if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity > 0)) {
   1315         return 0;
   1316     }
   1317     uint8_t noDest[1] = { 0 };
   1318     if(dest == NULL) {
   1319         // Distinguish pure preflighting from an allocation error.
   1320         dest = noDest;
   1321         capacity = 0;
   1322     }
   1323     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
   1324     UErrorCode errorCode = U_ZERO_ERROR;
   1325     writeSortKey(s, length, sink, errorCode);
   1326     return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
   1327 }
   1328 
   1329 void
   1330 RuleBasedCollator::writeSortKey(const UChar *s, int32_t length,
   1331                                 SortKeyByteSink &sink, UErrorCode &errorCode) const {
   1332     if(U_FAILURE(errorCode)) { return; }
   1333     const UChar *limit = (length >= 0) ? s + length : NULL;
   1334     UBool numeric = settings->isNumeric();
   1335     CollationKeys::LevelCallback callback;
   1336     if(settings->dontCheckFCD()) {
   1337         UTF16CollationIterator iter(data, numeric, s, s, limit);
   1338         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
   1339                                                   sink, Collation::PRIMARY_LEVEL,
   1340                                                   callback, TRUE, errorCode);
   1341     } else {
   1342         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
   1343         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
   1344                                                   sink, Collation::PRIMARY_LEVEL,
   1345                                                   callback, TRUE, errorCode);
   1346     }
   1347     if(settings->getStrength() == UCOL_IDENTICAL) {
   1348         writeIdenticalLevel(s, limit, sink, errorCode);
   1349     }
   1350     static const char terminator = 0;  // TERMINATOR_BYTE
   1351     sink.Append(&terminator, 1);
   1352 }
   1353 
   1354 void
   1355 RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit,
   1356                                        SortKeyByteSink &sink, UErrorCode &errorCode) const {
   1357     // NFD quick check
   1358     const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCode);
   1359     if(U_FAILURE(errorCode)) { return; }
   1360     sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
   1361     UChar32 prev = 0;
   1362     if(nfdQCYesLimit != s) {
   1363         prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
   1364     }
   1365     // Is there non-NFD text?
   1366     int32_t destLengthEstimate;
   1367     if(limit != NULL) {
   1368         if(nfdQCYesLimit == limit) { return; }
   1369         destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
   1370     } else {
   1371         // s is NUL-terminated
   1372         if(*nfdQCYesLimit == 0) { return; }
   1373         destLengthEstimate = -1;
   1374     }
   1375     UnicodeString nfd;
   1376     data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
   1377     u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
   1378 }
   1379 
   1380 namespace {
   1381 
   1382 /**
   1383  * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
   1384  * with an instance of this callback class.
   1385  * When another level is about to be written, the callback
   1386  * records the level and the number of bytes that will be written until
   1387  * the sink (which is actually a FixedSortKeyByteSink) fills up.
   1388  *
   1389  * When internalNextSortKeyPart() is called again, it restarts with the last level
   1390  * and ignores as many bytes as were written previously for that level.
   1391  */
   1392 class PartLevelCallback : public CollationKeys::LevelCallback {
   1393 public:
   1394     PartLevelCallback(const SortKeyByteSink &s)
   1395             : sink(s), level(Collation::PRIMARY_LEVEL) {
   1396         levelCapacity = sink.GetRemainingCapacity();
   1397     }
   1398     virtual ~PartLevelCallback() {}
   1399     virtual UBool needToWrite(Collation::Level l) {
   1400         if(!sink.Overflowed()) {
   1401             // Remember a level that will be at least partially written.
   1402             level = l;
   1403             levelCapacity = sink.GetRemainingCapacity();
   1404             return TRUE;
   1405         } else {
   1406             return FALSE;
   1407         }
   1408     }
   1409     Collation::Level getLevel() const { return level; }
   1410     int32_t getLevelCapacity() const { return levelCapacity; }
   1411 
   1412 private:
   1413     const SortKeyByteSink &sink;
   1414     Collation::Level level;
   1415     int32_t levelCapacity;
   1416 };
   1417 
   1418 }  // namespace
   1419 
   1420 int32_t
   1421 RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
   1422                                            uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
   1423     if(U_FAILURE(errorCode)) { return 0; }
   1424     if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)) {
   1425         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1426         return 0;
   1427     }
   1428     if(count == 0) { return 0; }
   1429 
   1430     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
   1431     sink.IgnoreBytes((int32_t)state[1]);
   1432     iter->move(iter, 0, UITER_START);
   1433 
   1434     Collation::Level level = (Collation::Level)state[0];
   1435     if(level <= Collation::QUATERNARY_LEVEL) {
   1436         UBool numeric = settings->isNumeric();
   1437         PartLevelCallback callback(sink);
   1438         if(settings->dontCheckFCD()) {
   1439             UIterCollationIterator ci(data, numeric, *iter);
   1440             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
   1441                                                       sink, level, callback, FALSE, errorCode);
   1442         } else {
   1443             FCDUIterCollationIterator ci(data, numeric, *iter, 0);
   1444             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
   1445                                                       sink, level, callback, FALSE, errorCode);
   1446         }
   1447         if(U_FAILURE(errorCode)) { return 0; }
   1448         if(sink.NumberOfBytesAppended() > count) {
   1449             state[0] = (uint32_t)callback.getLevel();
   1450             state[1] = (uint32_t)callback.getLevelCapacity();
   1451             return count;
   1452         }
   1453         // All of the normal levels are done.
   1454         if(settings->getStrength() == UCOL_IDENTICAL) {
   1455             level = Collation::IDENTICAL_LEVEL;
   1456             iter->move(iter, 0, UITER_START);
   1457         }
   1458         // else fall through to setting ZERO_LEVEL
   1459     }
   1460 
   1461     if(level == Collation::IDENTICAL_LEVEL) {
   1462         int32_t levelCapacity = sink.GetRemainingCapacity();
   1463         UnicodeString s;
   1464         for(;;) {
   1465             UChar32 c = iter->next(iter);
   1466             if(c < 0) { break; }
   1467             s.append((UChar)c);
   1468         }
   1469         const UChar *sArray = s.getBuffer();
   1470         writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
   1471         if(U_FAILURE(errorCode)) { return 0; }
   1472         if(sink.NumberOfBytesAppended() > count) {
   1473             state[0] = (uint32_t)level;
   1474             state[1] = (uint32_t)levelCapacity;
   1475             return count;
   1476         }
   1477     }
   1478 
   1479     // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
   1480     state[0] = (uint32_t)Collation::ZERO_LEVEL;
   1481     state[1] = 0;
   1482     int32_t length = sink.NumberOfBytesAppended();
   1483     int32_t i = length;
   1484     while(i < count) { dest[i++] = 0; }
   1485     return length;
   1486 }
   1487 
   1488 void
   1489 RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
   1490                                   UErrorCode &errorCode) const {
   1491     if(U_FAILURE(errorCode)) { return; }
   1492     const UChar *s = str.getBuffer();
   1493     const UChar *limit = s + str.length();
   1494     UBool numeric = settings->isNumeric();
   1495     if(settings->dontCheckFCD()) {
   1496         UTF16CollationIterator iter(data, numeric, s, s, limit);
   1497         int64_t ce;
   1498         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
   1499             ces.addElement(ce, errorCode);
   1500         }
   1501     } else {
   1502         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
   1503         int64_t ce;
   1504         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
   1505             ces.addElement(ce, errorCode);
   1506         }
   1507     }
   1508 }
   1509 
   1510 namespace {
   1511 
   1512 void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
   1513                   UErrorCode &errorCode) {
   1514     if(U_FAILURE(errorCode) || length == 0) { return; }
   1515     if(!s.isEmpty()) {
   1516         s.append('_', errorCode);
   1517     }
   1518     s.append(letter, errorCode);
   1519     for(int32_t i = 0; i < length; ++i) {
   1520         s.append(uprv_toupper(subtag[i]), errorCode);
   1521     }
   1522 }
   1523 
   1524 void appendAttribute(CharString &s, char letter, UColAttributeValue value,
   1525                      UErrorCode &errorCode) {
   1526     if(U_FAILURE(errorCode)) { return; }
   1527     if(!s.isEmpty()) {
   1528         s.append('_', errorCode);
   1529     }
   1530     static const char *valueChars = "1234...........IXO..SN..LU......";
   1531     s.append(letter, errorCode);
   1532     s.append(valueChars[value], errorCode);
   1533 }
   1534 
   1535 }  // namespace
   1536 
   1537 int32_t
   1538 RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
   1539                                                     char *buffer, int32_t capacity,
   1540                                                     UErrorCode &errorCode) const {
   1541     if(U_FAILURE(errorCode)) { return 0; }
   1542     if(buffer == NULL ? capacity != 0 : capacity < 0) {
   1543         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1544         return 0;
   1545     }
   1546     if(locale == NULL) {
   1547         locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
   1548     }
   1549 
   1550     char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
   1551     int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
   1552                                                   "collation", locale,
   1553                                                   NULL, &errorCode);
   1554     if(U_FAILURE(errorCode)) { return 0; }
   1555     if(length == 0) {
   1556         uprv_strcpy(resultLocale, "root");
   1557     } else {
   1558         resultLocale[length] = 0;
   1559     }
   1560 
   1561     // Append items in alphabetic order of their short definition letters.
   1562     CharString result;
   1563     char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1564 
   1565     if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
   1566         appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
   1567     }
   1568     // ATTR_VARIABLE_TOP not supported because 'B' was broken.
   1569     // See ICU tickets #10372 and #10386.
   1570     if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
   1571         appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
   1572     }
   1573     if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
   1574         appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
   1575     }
   1576     if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
   1577         appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
   1578     }
   1579     if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
   1580         appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
   1581     }
   1582     // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
   1583     length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTHOF(subtag), &errorCode);
   1584     appendSubtag(result, 'K', subtag, length, errorCode);
   1585     length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
   1586     appendSubtag(result, 'L', subtag, length, errorCode);
   1587     if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
   1588         appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
   1589     }
   1590     length = uloc_getCountry(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
   1591     appendSubtag(result, 'R', subtag, length, errorCode);
   1592     if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
   1593         appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
   1594     }
   1595     length = uloc_getVariant(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
   1596     appendSubtag(result, 'V', subtag, length, errorCode);
   1597     length = uloc_getScript(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
   1598     appendSubtag(result, 'Z', subtag, length, errorCode);
   1599 
   1600     if(U_FAILURE(errorCode)) { return 0; }
   1601     if(result.length() <= capacity) {
   1602         uprv_memcpy(buffer, result.data(), result.length());
   1603     }
   1604     return u_terminateChars(buffer, capacity, result.length(), &errorCode);
   1605 }
   1606 
   1607 UBool
   1608 RuleBasedCollator::isUnsafe(UChar32 c) const {
   1609     return data->isUnsafeBackward(c, settings->isNumeric());
   1610 }
   1611 
   1612 void
   1613 RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
   1614     t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
   1615 }
   1616 
   1617 UBool
   1618 RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
   1619     umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
   1620     return U_SUCCESS(errorCode);
   1621 }
   1622 
   1623 CollationElementIterator *
   1624 RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
   1625     UErrorCode errorCode = U_ZERO_ERROR;
   1626     if(!initMaxExpansions(errorCode)) { return NULL; }
   1627     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
   1628     if(U_FAILURE(errorCode)) {
   1629         delete cei;
   1630         return NULL;
   1631     }
   1632     return cei;
   1633 }
   1634 
   1635 CollationElementIterator *
   1636 RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
   1637     UErrorCode errorCode = U_ZERO_ERROR;
   1638     if(!initMaxExpansions(errorCode)) { return NULL; }
   1639     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
   1640     if(U_FAILURE(errorCode)) {
   1641         delete cei;
   1642         return NULL;
   1643     }
   1644     return cei;
   1645 }
   1646 
   1647 int32_t
   1648 RuleBasedCollator::getMaxExpansion(int32_t order) const {
   1649     UErrorCode errorCode = U_ZERO_ERROR;
   1650     (void)initMaxExpansions(errorCode);
   1651     return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
   1652 }
   1653 
   1654 U_NAMESPACE_END
   1655 
   1656 #endif  // !UCONFIG_NO_COLLATION
   1657