Home | History | Annotate | Download | only in i18n
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 1996-2015, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * rulebasedcollator.cpp
      9 *
     10 * (replaced the former tblcoll.cpp)
     11 *
     12 * created on: 2012feb14 with new and old collation code
     13 * created by: Markus W. Scherer
     14 */
     15 
     16 #include "unicode/utypes.h"
     17 
     18 #if !UCONFIG_NO_COLLATION
     19 
     20 #include "unicode/coll.h"
     21 #include "unicode/coleitr.h"
     22 #include "unicode/localpointer.h"
     23 #include "unicode/locid.h"
     24 #include "unicode/sortkey.h"
     25 #include "unicode/tblcoll.h"
     26 #include "unicode/ucol.h"
     27 #include "unicode/uiter.h"
     28 #include "unicode/uloc.h"
     29 #include "unicode/uniset.h"
     30 #include "unicode/unistr.h"
     31 #include "unicode/usetiter.h"
     32 #include "unicode/utf8.h"
     33 #include "unicode/uversion.h"
     34 #include "bocsu.h"
     35 #include "charstr.h"
     36 #include "cmemory.h"
     37 #include "collation.h"
     38 #include "collationcompare.h"
     39 #include "collationdata.h"
     40 #include "collationdatareader.h"
     41 #include "collationfastlatin.h"
     42 #include "collationiterator.h"
     43 #include "collationkeys.h"
     44 #include "collationroot.h"
     45 #include "collationsets.h"
     46 #include "collationsettings.h"
     47 #include "collationtailoring.h"
     48 #include "cstring.h"
     49 #include "uassert.h"
     50 #include "ucol_imp.h"
     51 #include "uhash.h"
     52 #include "uitercollationiterator.h"
     53 #include "ustr_imp.h"
     54 #include "utf16collationiterator.h"
     55 #include "utf8collationiterator.h"
     56 #include "uvectr64.h"
     57 
     58 U_NAMESPACE_BEGIN
     59 
     60 namespace {
     61 
     62 class FixedSortKeyByteSink : public SortKeyByteSink {
     63 public:
     64     FixedSortKeyByteSink(char *dest, int32_t destCapacity)
     65             : SortKeyByteSink(dest, destCapacity) {}
     66     virtual ~FixedSortKeyByteSink();
     67 
     68 private:
     69     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
     70     virtual UBool Resize(int32_t appendCapacity, int32_t length);
     71 };
     72 
     73 FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
     74 
     75 void
     76 FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
     77     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
     78     // Fill the buffer completely.
     79     int32_t available = capacity_ - length;
     80     if (available > 0) {
     81         uprv_memcpy(buffer_ + length, bytes, available);
     82     }
     83 }
     84 
     85 UBool
     86 FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
     87     return FALSE;
     88 }
     89 
     90 }  // namespace
     91 
     92 // Not in an anonymous namespace, so that it can be a friend of CollationKey.
     93 class CollationKeyByteSink : public SortKeyByteSink {
     94 public:
     95     CollationKeyByteSink(CollationKey &key)
     96             : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
     97               key_(key) {}
     98     virtual ~CollationKeyByteSink();
     99 
    100 private:
    101     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
    102     virtual UBool Resize(int32_t appendCapacity, int32_t length);
    103 
    104     CollationKey &key_;
    105 };
    106 
    107 CollationKeyByteSink::~CollationKeyByteSink() {}
    108 
    109 void
    110 CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
    111     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
    112     if (Resize(n, length)) {
    113         uprv_memcpy(buffer_ + length, bytes, n);
    114     }
    115 }
    116 
    117 UBool
    118 CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
    119     if (buffer_ == NULL) {
    120         return FALSE;  // allocation failed before already
    121     }
    122     int32_t newCapacity = 2 * capacity_;
    123     int32_t altCapacity = length + 2 * appendCapacity;
    124     if (newCapacity < altCapacity) {
    125         newCapacity = altCapacity;
    126     }
    127     if (newCapacity < 200) {
    128         newCapacity = 200;
    129     }
    130     uint8_t *newBuffer = key_.reallocate(newCapacity, length);
    131     if (newBuffer == NULL) {
    132         SetNotOk();
    133         return FALSE;
    134     }
    135     buffer_ = reinterpret_cast<char *>(newBuffer);
    136     capacity_ = newCapacity;
    137     return TRUE;
    138 }
    139 
    140 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
    141         : Collator(other),
    142           data(other.data),
    143           settings(other.settings),
    144           tailoring(other.tailoring),
    145           cacheEntry(other.cacheEntry),
    146           validLocale(other.validLocale),
    147           explicitlySetAttributes(other.explicitlySetAttributes),
    148           actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
    149     settings->addRef();
    150     cacheEntry->addRef();
    151 }
    152 
    153 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
    154                                      const RuleBasedCollator *base, UErrorCode &errorCode)
    155         : data(NULL),
    156           settings(NULL),
    157           tailoring(NULL),
    158           cacheEntry(NULL),
    159           validLocale(""),
    160           explicitlySetAttributes(0),
    161           actualLocaleIsSameAsValid(FALSE) {
    162     if(U_FAILURE(errorCode)) { return; }
    163     if(bin == NULL || length == 0 || base == NULL) {
    164         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    165         return;
    166     }
    167     const CollationTailoring *root = CollationRoot::getRoot(errorCode);
    168     if(U_FAILURE(errorCode)) { return; }
    169     if(base->tailoring != root) {
    170         errorCode = U_UNSUPPORTED_ERROR;
    171         return;
    172     }
    173     LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
    174     if(t.isNull() || t->isBogus()) {
    175         errorCode = U_MEMORY_ALLOCATION_ERROR;
    176         return;
    177     }
    178     CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
    179     if(U_FAILURE(errorCode)) { return; }
    180     t->actualLocale.setToBogus();
    181     adoptTailoring(t.orphan(), errorCode);
    182 }
    183 
    184 RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
    185         : data(entry->tailoring->data),
    186           settings(entry->tailoring->settings),
    187           tailoring(entry->tailoring),
    188           cacheEntry(entry),
    189           validLocale(entry->validLocale),
    190           explicitlySetAttributes(0),
    191           actualLocaleIsSameAsValid(FALSE) {
    192     settings->addRef();
    193     cacheEntry->addRef();
    194 }
    195 
    196 RuleBasedCollator::~RuleBasedCollator() {
    197     SharedObject::clearPtr(settings);
    198     SharedObject::clearPtr(cacheEntry);
    199 }
    200 
    201 void
    202 RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
    203     if(U_FAILURE(errorCode)) {
    204         t->deleteIfZeroRefCount();
    205         return;
    206     }
    207     U_ASSERT(settings == NULL && data == NULL && tailoring == NULL && cacheEntry == NULL);
    208     cacheEntry = new CollationCacheEntry(t->actualLocale, t);
    209     if(cacheEntry == NULL) {
    210         errorCode = U_MEMORY_ALLOCATION_ERROR;
    211         t->deleteIfZeroRefCount();
    212         return;
    213     }
    214     data = t->data;
    215     settings = t->settings;
    216     settings->addRef();
    217     tailoring = t;
    218     cacheEntry->addRef();
    219     validLocale = t->actualLocale;
    220     actualLocaleIsSameAsValid = FALSE;
    221 }
    222 
    223 Collator *
    224 RuleBasedCollator::clone() const {
    225     return new RuleBasedCollator(*this);
    226 }
    227 
    228 RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
    229     if(this == &other) { return *this; }
    230     SharedObject::copyPtr(other.settings, settings);
    231     tailoring = other.tailoring;
    232     SharedObject::copyPtr(other.cacheEntry, cacheEntry);
    233     data = tailoring->data;
    234     validLocale = other.validLocale;
    235     explicitlySetAttributes = other.explicitlySetAttributes;
    236     actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
    237     return *this;
    238 }
    239 
    240 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
    241 
    242 UBool
    243 RuleBasedCollator::operator==(const Collator& other) const {
    244     if(this == &other) { return TRUE; }
    245     if(!Collator::operator==(other)) { return FALSE; }
    246     const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
    247     if(*settings != *o.settings) { return FALSE; }
    248     if(data == o.data) { return TRUE; }
    249     UBool thisIsRoot = data->base == NULL;
    250     UBool otherIsRoot = o.data->base == NULL;
    251     U_ASSERT(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
    252     if(thisIsRoot != otherIsRoot) { return FALSE; }
    253     if((thisIsRoot || !tailoring->rules.isEmpty()) &&
    254             (otherIsRoot || !o.tailoring->rules.isEmpty())) {
    255         // Shortcut: If both collators have valid rule strings, then compare those.
    256         if(tailoring->rules == o.tailoring->rules) { return TRUE; }
    257     }
    258     // Different rule strings can result in the same or equivalent tailoring.
    259     // The rule strings are optional in ICU resource bundles, although included by default.
    260     // cloneBinary() drops the rule string.
    261     UErrorCode errorCode = U_ZERO_ERROR;
    262     LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
    263     LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
    264     if(U_FAILURE(errorCode)) { return FALSE; }
    265     if(*thisTailored != *otherTailored) { return FALSE; }
    266     // For completeness, we should compare all of the mappings;
    267     // or we should create a list of strings, sort it with one collator,
    268     // and check if both collators compare adjacent strings the same
    269     // (order & strength, down to quaternary); or similar.
    270     // Testing equality of collators seems unusual.
    271     return TRUE;
    272 }
    273 
    274 int32_t
    275 RuleBasedCollator::hashCode() const {
    276     int32_t h = settings->hashCode();
    277     if(data->base == NULL) { return h; }  // root collator
    278     // Do not rely on the rule string, see comments in operator==().
    279     UErrorCode errorCode = U_ZERO_ERROR;
    280     LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
    281     if(U_FAILURE(errorCode)) { return 0; }
    282     UnicodeSetIterator iter(*set);
    283     while(iter.next() && !iter.isString()) {
    284         h ^= data->getCE32(iter.getCodepoint());
    285     }
    286     return h;
    287 }
    288 
    289 void
    290 RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
    291                               const Locale &actual) {
    292     if(actual == tailoring->actualLocale) {
    293         actualLocaleIsSameAsValid = FALSE;
    294     } else {
    295         U_ASSERT(actual == valid);
    296         actualLocaleIsSameAsValid = TRUE;
    297     }
    298     // Do not modify tailoring.actualLocale:
    299     // We cannot be sure that that would be thread-safe.
    300     validLocale = valid;
    301     (void)requested;  // Ignore, see also ticket #10477.
    302 }
    303 
    304 Locale
    305 RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
    306     if(U_FAILURE(errorCode)) {
    307         return Locale::getRoot();
    308     }
    309     switch(type) {
    310     case ULOC_ACTUAL_LOCALE:
    311         return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
    312     case ULOC_VALID_LOCALE:
    313         return validLocale;
    314     case ULOC_REQUESTED_LOCALE:
    315     default:
    316         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    317         return Locale::getRoot();
    318     }
    319 }
    320 
    321 const char *
    322 RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
    323     if(U_FAILURE(errorCode)) {
    324         return NULL;
    325     }
    326     const Locale *result;
    327     switch(type) {
    328     case ULOC_ACTUAL_LOCALE:
    329         result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
    330         break;
    331     case ULOC_VALID_LOCALE:
    332         result = &validLocale;
    333         break;
    334     case ULOC_REQUESTED_LOCALE:
    335     default:
    336         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    337         return NULL;
    338     }
    339     if(result->isBogus()) { return NULL; }
    340     const char *id = result->getName();
    341     return id[0] == 0 ? "root" : id;
    342 }
    343 
    344 const UnicodeString&
    345 RuleBasedCollator::getRules() const {
    346     return tailoring->rules;
    347 }
    348 
    349 void
    350 RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
    351     if(delta == UCOL_TAILORING_ONLY) {
    352         buffer = tailoring->rules;
    353         return;
    354     }
    355     // UCOL_FULL_RULES
    356     buffer.remove();
    357     CollationLoader::appendRootRules(buffer);
    358     buffer.append(tailoring->rules).getTerminatedBuffer();
    359 }
    360 
    361 void
    362 RuleBasedCollator::getVersion(UVersionInfo version) const {
    363     uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
    364     version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
    365 }
    366 
    367 UnicodeSet *
    368 RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
    369     if(U_FAILURE(errorCode)) { return NULL; }
    370     UnicodeSet *tailored = new UnicodeSet();
    371     if(tailored == NULL) {
    372         errorCode = U_MEMORY_ALLOCATION_ERROR;
    373         return NULL;
    374     }
    375     if(data->base != NULL) {
    376         TailoredSet(tailored).forData(data, errorCode);
    377         if(U_FAILURE(errorCode)) {
    378             delete tailored;
    379             return NULL;
    380         }
    381     }
    382     return tailored;
    383 }
    384 
    385 void
    386 RuleBasedCollator::internalGetContractionsAndExpansions(
    387         UnicodeSet *contractions, UnicodeSet *expansions,
    388         UBool addPrefixes, UErrorCode &errorCode) const {
    389     if(U_FAILURE(errorCode)) { return; }
    390     if(contractions != NULL) {
    391         contractions->clear();
    392     }
    393     if(expansions != NULL) {
    394         expansions->clear();
    395     }
    396     ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forData(data, errorCode);
    397 }
    398 
    399 void
    400 RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
    401     if(U_FAILURE(errorCode)) { return; }
    402     ContractionsAndExpansions(&set, NULL, NULL, FALSE).forCodePoint(data, c, errorCode);
    403 }
    404 
    405 const CollationSettings &
    406 RuleBasedCollator::getDefaultSettings() const {
    407     return *tailoring->settings;
    408 }
    409 
    410 UColAttributeValue
    411 RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
    412     if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
    413     int32_t option;
    414     switch(attr) {
    415     case UCOL_FRENCH_COLLATION:
    416         option = CollationSettings::BACKWARD_SECONDARY;
    417         break;
    418     case UCOL_ALTERNATE_HANDLING:
    419         return settings->getAlternateHandling();
    420     case UCOL_CASE_FIRST:
    421         return settings->getCaseFirst();
    422     case UCOL_CASE_LEVEL:
    423         option = CollationSettings::CASE_LEVEL;
    424         break;
    425     case UCOL_NORMALIZATION_MODE:
    426         option = CollationSettings::CHECK_FCD;
    427         break;
    428     case UCOL_STRENGTH:
    429         return (UColAttributeValue)settings->getStrength();
    430     case UCOL_HIRAGANA_QUATERNARY_MODE:
    431         // Deprecated attribute, unsettable.
    432         return UCOL_OFF;
    433     case UCOL_NUMERIC_COLLATION:
    434         option = CollationSettings::NUMERIC;
    435         break;
    436     default:
    437         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    438         return UCOL_DEFAULT;
    439     }
    440     return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
    441 }
    442 
    443 void
    444 RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
    445                                 UErrorCode &errorCode) {
    446     UColAttributeValue oldValue = getAttribute(attr, errorCode);
    447     if(U_FAILURE(errorCode)) { return; }
    448     if(value == oldValue) {
    449         setAttributeExplicitly(attr);
    450         return;
    451     }
    452     const CollationSettings &defaultSettings = getDefaultSettings();
    453     if(settings == &defaultSettings) {
    454         if(value == UCOL_DEFAULT) {
    455             setAttributeDefault(attr);
    456             return;
    457         }
    458     }
    459     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    460     if(ownedSettings == NULL) {
    461         errorCode = U_MEMORY_ALLOCATION_ERROR;
    462         return;
    463     }
    464 
    465     switch(attr) {
    466     case UCOL_FRENCH_COLLATION:
    467         ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
    468                                defaultSettings.options, errorCode);
    469         break;
    470     case UCOL_ALTERNATE_HANDLING:
    471         ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
    472         break;
    473     case UCOL_CASE_FIRST:
    474         ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
    475         break;
    476     case UCOL_CASE_LEVEL:
    477         ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
    478                                defaultSettings.options, errorCode);
    479         break;
    480     case UCOL_NORMALIZATION_MODE:
    481         ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
    482                                defaultSettings.options, errorCode);
    483         break;
    484     case UCOL_STRENGTH:
    485         ownedSettings->setStrength(value, defaultSettings.options, errorCode);
    486         break;
    487     case UCOL_HIRAGANA_QUATERNARY_MODE:
    488         // Deprecated attribute. Check for valid values but do not change anything.
    489         if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
    490             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    491         }
    492         break;
    493     case UCOL_NUMERIC_COLLATION:
    494         ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
    495         break;
    496     default:
    497         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    498         break;
    499     }
    500     if(U_FAILURE(errorCode)) { return; }
    501     setFastLatinOptions(*ownedSettings);
    502     if(value == UCOL_DEFAULT) {
    503         setAttributeDefault(attr);
    504     } else {
    505         setAttributeExplicitly(attr);
    506     }
    507 }
    508 
    509 Collator &
    510 RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
    511     if(U_FAILURE(errorCode)) { return *this; }
    512     // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
    513     int32_t value;
    514     if(group == UCOL_REORDER_CODE_DEFAULT) {
    515         value = UCOL_DEFAULT;
    516     } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
    517         value = group - UCOL_REORDER_CODE_FIRST;
    518     } else {
    519         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    520         return *this;
    521     }
    522     CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
    523     if(value == oldValue) {
    524         setAttributeExplicitly(ATTR_VARIABLE_TOP);
    525         return *this;
    526     }
    527     const CollationSettings &defaultSettings = getDefaultSettings();
    528     if(settings == &defaultSettings) {
    529         if(value == UCOL_DEFAULT) {
    530             setAttributeDefault(ATTR_VARIABLE_TOP);
    531             return *this;
    532         }
    533     }
    534     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    535     if(ownedSettings == NULL) {
    536         errorCode = U_MEMORY_ALLOCATION_ERROR;
    537         return *this;
    538     }
    539 
    540     if(group == UCOL_REORDER_CODE_DEFAULT) {
    541         group = (UColReorderCode)(UCOL_REORDER_CODE_FIRST + defaultSettings.getMaxVariable());
    542     }
    543     uint32_t varTop = data->getLastPrimaryForGroup(group);
    544     U_ASSERT(varTop != 0);
    545     ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
    546     if(U_FAILURE(errorCode)) { return *this; }
    547     ownedSettings->variableTop = varTop;
    548     setFastLatinOptions(*ownedSettings);
    549     if(value == UCOL_DEFAULT) {
    550         setAttributeDefault(ATTR_VARIABLE_TOP);
    551     } else {
    552         setAttributeExplicitly(ATTR_VARIABLE_TOP);
    553     }
    554     return *this;
    555 }
    556 
    557 UColReorderCode
    558 RuleBasedCollator::getMaxVariable() const {
    559     return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());
    560 }
    561 
    562 uint32_t
    563 RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
    564     return settings->variableTop;
    565 }
    566 
    567 uint32_t
    568 RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &errorCode) {
    569     if(U_FAILURE(errorCode)) { return 0; }
    570     if(varTop == NULL && len !=0) {
    571         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    572         return 0;
    573     }
    574     if(len < 0) { len = u_strlen(varTop); }
    575     if(len == 0) {
    576         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    577         return 0;
    578     }
    579     UBool numeric = settings->isNumeric();
    580     int64_t ce1, ce2;
    581     if(settings->dontCheckFCD()) {
    582         UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
    583         ce1 = ci.nextCE(errorCode);
    584         ce2 = ci.nextCE(errorCode);
    585     } else {
    586         FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
    587         ce1 = ci.nextCE(errorCode);
    588         ce2 = ci.nextCE(errorCode);
    589     }
    590     if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
    591         errorCode = U_CE_NOT_FOUND_ERROR;
    592         return 0;
    593     }
    594     setVariableTop((uint32_t)(ce1 >> 32), errorCode);
    595     return settings->variableTop;
    596 }
    597 
    598 uint32_t
    599 RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
    600     return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
    601 }
    602 
    603 void
    604 RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
    605     if(U_FAILURE(errorCode)) { return; }
    606     if(varTop != settings->variableTop) {
    607         // Pin the variable top to the end of the reordering group which contains it.
    608         // Only a few special groups are supported.
    609         int32_t group = data->getGroupForPrimary(varTop);
    610         if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
    611             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    612             return;
    613         }
    614         uint32_t v = data->getLastPrimaryForGroup(group);
    615         U_ASSERT(v != 0 && v >= varTop);
    616         varTop = v;
    617         if(varTop != settings->variableTop) {
    618             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    619             if(ownedSettings == NULL) {
    620                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    621                 return;
    622             }
    623             ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
    624                                           getDefaultSettings().options, errorCode);
    625             if(U_FAILURE(errorCode)) { return; }
    626             ownedSettings->variableTop = varTop;
    627             setFastLatinOptions(*ownedSettings);
    628         }
    629     }
    630     if(varTop == getDefaultSettings().variableTop) {
    631         setAttributeDefault(ATTR_VARIABLE_TOP);
    632     } else {
    633         setAttributeExplicitly(ATTR_VARIABLE_TOP);
    634     }
    635 }
    636 
    637 int32_t
    638 RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
    639                                    UErrorCode &errorCode) const {
    640     if(U_FAILURE(errorCode)) { return 0; }
    641     if(capacity < 0 || (dest == NULL && capacity > 0)) {
    642         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    643         return 0;
    644     }
    645     int32_t length = settings->reorderCodesLength;
    646     if(length == 0) { return 0; }
    647     if(length > capacity) {
    648         errorCode = U_BUFFER_OVERFLOW_ERROR;
    649         return length;
    650     }
    651     uprv_memcpy(dest, settings->reorderCodes, length * 4);
    652     return length;
    653 }
    654 
    655 void
    656 RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
    657                                    UErrorCode &errorCode) {
    658     if(U_FAILURE(errorCode)) { return; }
    659     if(length < 0 || (reorderCodes == NULL && length > 0)) {
    660         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    661         return;
    662     }
    663     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
    664         length = 0;
    665     }
    666     if(length == settings->reorderCodesLength &&
    667             uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
    668         return;
    669     }
    670     const CollationSettings &defaultSettings = getDefaultSettings();
    671     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
    672         if(settings != &defaultSettings) {
    673             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    674             if(ownedSettings == NULL) {
    675                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    676                 return;
    677             }
    678             ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
    679             setFastLatinOptions(*ownedSettings);
    680         }
    681         return;
    682     }
    683     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    684     if(ownedSettings == NULL) {
    685         errorCode = U_MEMORY_ALLOCATION_ERROR;
    686         return;
    687     }
    688     ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
    689     setFastLatinOptions(*ownedSettings);
    690 }
    691 
    692 void
    693 RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
    694     ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
    695             data, ownedSettings,
    696             ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));
    697 }
    698 
    699 UCollationResult
    700 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
    701                            UErrorCode &errorCode) const {
    702     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    703     return doCompare(left.getBuffer(), left.length(),
    704                      right.getBuffer(), right.length(), errorCode);
    705 }
    706 
    707 UCollationResult
    708 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
    709                            int32_t length, UErrorCode &errorCode) const {
    710     if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
    711     if(length < 0) {
    712         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    713         return UCOL_EQUAL;
    714     }
    715     int32_t leftLength = left.length();
    716     int32_t rightLength = right.length();
    717     if(leftLength > length) { leftLength = length; }
    718     if(rightLength > length) { rightLength = length; }
    719     return doCompare(left.getBuffer(), leftLength,
    720                      right.getBuffer(), rightLength, errorCode);
    721 }
    722 
    723 UCollationResult
    724 RuleBasedCollator::compare(const UChar *left, int32_t leftLength,
    725                            const UChar *right, int32_t rightLength,
    726                            UErrorCode &errorCode) const {
    727     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    728     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
    729         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    730         return UCOL_EQUAL;
    731     }
    732     // Make sure both or neither strings have a known length.
    733     // We do not optimize for mixed length/termination.
    734     if(leftLength >= 0) {
    735         if(rightLength < 0) { rightLength = u_strlen(right); }
    736     } else {
    737         if(rightLength >= 0) { leftLength = u_strlen(left); }
    738     }
    739     return doCompare(left, leftLength, right, rightLength, errorCode);
    740 }
    741 
    742 UCollationResult
    743 RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
    744                                UErrorCode &errorCode) const {
    745     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    746     const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
    747     const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
    748     if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.empty())) {
    749         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    750         return UCOL_EQUAL;
    751     }
    752     return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
    753 }
    754 
    755 UCollationResult
    756 RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
    757                                        const char *right, int32_t rightLength,
    758                                        UErrorCode &errorCode) const {
    759     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    760     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
    761         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    762         return UCOL_EQUAL;
    763     }
    764     // Make sure both or neither strings have a known length.
    765     // We do not optimize for mixed length/termination.
    766     if(leftLength >= 0) {
    767         if(rightLength < 0) { rightLength = uprv_strlen(right); }
    768     } else {
    769         if(rightLength >= 0) { leftLength = uprv_strlen(left); }
    770     }
    771     return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
    772                      reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
    773 }
    774 
    775 namespace {
    776 
    777 /**
    778  * Abstract iterator for identical-level string comparisons.
    779  * Returns FCD code points and handles temporary switching to NFD.
    780  */
    781 class NFDIterator : public UObject {
    782 public:
    783     NFDIterator() : index(-1), length(0) {}
    784     virtual ~NFDIterator() {}
    785     /**
    786      * Returns the next code point from the internal normalization buffer,
    787      * or else the next text code point.
    788      * Returns -1 at the end of the text.
    789      */
    790     UChar32 nextCodePoint() {
    791         if(index >= 0) {
    792             if(index == length) {
    793                 index = -1;
    794             } else {
    795                 UChar32 c;
    796                 U16_NEXT_UNSAFE(decomp, index, c);
    797                 return c;
    798             }
    799         }
    800         return nextRawCodePoint();
    801     }
    802     /**
    803      * @param nfcImpl
    804      * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
    805      * @return the first code point in c's decomposition,
    806      *         or c itself if it was decomposed already or if it does not decompose
    807      */
    808     UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
    809         if(index >= 0) { return c; }
    810         decomp = nfcImpl.getDecomposition(c, buffer, length);
    811         if(decomp == NULL) { return c; }
    812         index = 0;
    813         U16_NEXT_UNSAFE(decomp, index, c);
    814         return c;
    815     }
    816 protected:
    817     /**
    818      * Returns the next text code point in FCD order.
    819      * Returns -1 at the end of the text.
    820      */
    821     virtual UChar32 nextRawCodePoint() = 0;
    822 private:
    823     const UChar *decomp;
    824     UChar buffer[4];
    825     int32_t index;
    826     int32_t length;
    827 };
    828 
    829 class UTF16NFDIterator : public NFDIterator {
    830 public:
    831     UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {}
    832 protected:
    833     virtual UChar32 nextRawCodePoint() {
    834         if(s == limit) { return U_SENTINEL; }
    835         UChar32 c = *s++;
    836         if(limit == NULL && c == 0) {
    837             s = NULL;
    838             return U_SENTINEL;
    839         }
    840         UChar trail;
    841         if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
    842             ++s;
    843             c = U16_GET_SUPPLEMENTARY(c, trail);
    844         }
    845         return c;
    846     }
    847 
    848     const UChar *s;
    849     const UChar *limit;
    850 };
    851 
    852 class FCDUTF16NFDIterator : public UTF16NFDIterator {
    853 public:
    854     FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const UChar *textLimit)
    855             : UTF16NFDIterator(NULL, NULL) {
    856         UErrorCode errorCode = U_ZERO_ERROR;
    857         const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCode);
    858         if(U_FAILURE(errorCode)) { return; }
    859         if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) {
    860             s = text;
    861             limit = spanLimit;
    862         } else {
    863             str.setTo(text, (int32_t)(spanLimit - text));
    864             {
    865                 ReorderingBuffer buffer(nfcImpl, str);
    866                 if(buffer.init(str.length(), errorCode)) {
    867                     nfcImpl.makeFCD(spanLimit, textLimit, &buffer, errorCode);
    868                 }
    869             }
    870             if(U_SUCCESS(errorCode)) {
    871                 s = str.getBuffer();
    872                 limit = s + str.length();
    873             }
    874         }
    875     }
    876 private:
    877     UnicodeString str;
    878 };
    879 
    880 class UTF8NFDIterator : public NFDIterator {
    881 public:
    882     UTF8NFDIterator(const uint8_t *text, int32_t textLength)
    883         : s(text), pos(0), length(textLength) {}
    884 protected:
    885     virtual UChar32 nextRawCodePoint() {
    886         if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
    887         UChar32 c;
    888         U8_NEXT_OR_FFFD(s, pos, length, c);
    889         return c;
    890     }
    891 
    892     const uint8_t *s;
    893     int32_t pos;
    894     int32_t length;
    895 };
    896 
    897 class FCDUTF8NFDIterator : public NFDIterator {
    898 public:
    899     FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
    900             : u8ci(data, FALSE, text, 0, textLength) {}
    901 protected:
    902     virtual UChar32 nextRawCodePoint() {
    903         UErrorCode errorCode = U_ZERO_ERROR;
    904         return u8ci.nextCodePoint(errorCode);
    905     }
    906 private:
    907     FCDUTF8CollationIterator u8ci;
    908 };
    909 
    910 class UIterNFDIterator : public NFDIterator {
    911 public:
    912     UIterNFDIterator(UCharIterator &it) : iter(it) {}
    913 protected:
    914     virtual UChar32 nextRawCodePoint() {
    915         return uiter_next32(&iter);
    916     }
    917 private:
    918     UCharIterator &iter;
    919 };
    920 
    921 class FCDUIterNFDIterator : public NFDIterator {
    922 public:
    923     FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
    924             : uici(data, FALSE, it, startIndex) {}
    925 protected:
    926     virtual UChar32 nextRawCodePoint() {
    927         UErrorCode errorCode = U_ZERO_ERROR;
    928         return uici.nextCodePoint(errorCode);
    929     }
    930 private:
    931     FCDUIterCollationIterator uici;
    932 };
    933 
    934 UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
    935                                 NFDIterator &left, NFDIterator &right) {
    936     for(;;) {
    937         // Fetch the next FCD code point from each string.
    938         UChar32 leftCp = left.nextCodePoint();
    939         UChar32 rightCp = right.nextCodePoint();
    940         if(leftCp == rightCp) {
    941             if(leftCp < 0) { break; }
    942             continue;
    943         }
    944         // If they are different, then decompose each and compare again.
    945         if(leftCp < 0) {
    946             leftCp = -2;  // end of string
    947         } else if(leftCp == 0xfffe) {
    948             leftCp = -1;  // U+FFFE: merge separator
    949         } else {
    950             leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
    951         }
    952         if(rightCp < 0) {
    953             rightCp = -2;  // end of string
    954         } else if(rightCp == 0xfffe) {
    955             rightCp = -1;  // U+FFFE: merge separator
    956         } else {
    957             rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
    958         }
    959         if(leftCp < rightCp) { return UCOL_LESS; }
    960         if(leftCp > rightCp) { return UCOL_GREATER; }
    961     }
    962     return UCOL_EQUAL;
    963 }
    964 
    965 }  // namespace
    966 
    967 UCollationResult
    968 RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength,
    969                              const UChar *right, int32_t rightLength,
    970                              UErrorCode &errorCode) const {
    971     // U_FAILURE(errorCode) checked by caller.
    972     if(left == right && leftLength == rightLength) {
    973         return UCOL_EQUAL;
    974     }
    975 
    976     // Identical-prefix test.
    977     const UChar *leftLimit;
    978     const UChar *rightLimit;
    979     int32_t equalPrefixLength = 0;
    980     if(leftLength < 0) {
    981         leftLimit = NULL;
    982         rightLimit = NULL;
    983         UChar c;
    984         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
    985             if(c == 0) { return UCOL_EQUAL; }
    986             ++equalPrefixLength;
    987         }
    988     } else {
    989         leftLimit = left + leftLength;
    990         rightLimit = right + rightLength;
    991         for(;;) {
    992             if(equalPrefixLength == leftLength) {
    993                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
    994                 break;
    995             } else if(equalPrefixLength == rightLength ||
    996                       left[equalPrefixLength] != right[equalPrefixLength]) {
    997                 break;
    998             }
    999             ++equalPrefixLength;
   1000         }
   1001     }
   1002 
   1003     UBool numeric = settings->isNumeric();
   1004     if(equalPrefixLength > 0) {
   1005         if((equalPrefixLength != leftLength &&
   1006                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
   1007                 (equalPrefixLength != rightLength &&
   1008                     data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
   1009             // Identical prefix: Back up to the start of a contraction or reordering sequence.
   1010             while(--equalPrefixLength > 0 &&
   1011                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
   1012         }
   1013         // Notes:
   1014         // - A longer string can compare equal to a prefix of it if only ignorables follow.
   1015         // - With a backward level, a longer string can compare less-than a prefix of it.
   1016 
   1017         // Pass the actual start of each string into the CollationIterators,
   1018         // plus the equalPrefixLength position,
   1019         // so that prefix matches back into the equal prefix work.
   1020     }
   1021 
   1022     int32_t result;
   1023     int32_t fastLatinOptions = settings->fastLatinOptions;
   1024     if(fastLatinOptions >= 0 &&
   1025             (equalPrefixLength == leftLength ||
   1026                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
   1027             (equalPrefixLength == rightLength ||
   1028                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
   1029         if(leftLength >= 0) {
   1030             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
   1031                                                       settings->fastLatinPrimaries,
   1032                                                       fastLatinOptions,
   1033                                                       left + equalPrefixLength,
   1034                                                       leftLength - equalPrefixLength,
   1035                                                       right + equalPrefixLength,
   1036                                                       rightLength - equalPrefixLength);
   1037         } else {
   1038             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
   1039                                                       settings->fastLatinPrimaries,
   1040                                                       fastLatinOptions,
   1041                                                       left + equalPrefixLength, -1,
   1042                                                       right + equalPrefixLength, -1);
   1043         }
   1044     } else {
   1045         result = CollationFastLatin::BAIL_OUT_RESULT;
   1046     }
   1047 
   1048     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
   1049         if(settings->dontCheckFCD()) {
   1050             UTF16CollationIterator leftIter(data, numeric,
   1051                                             left, left + equalPrefixLength, leftLimit);
   1052             UTF16CollationIterator rightIter(data, numeric,
   1053                                             right, right + equalPrefixLength, rightLimit);
   1054             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1055         } else {
   1056             FCDUTF16CollationIterator leftIter(data, numeric,
   1057                                               left, left + equalPrefixLength, leftLimit);
   1058             FCDUTF16CollationIterator rightIter(data, numeric,
   1059                                                 right, right + equalPrefixLength, rightLimit);
   1060             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1061         }
   1062     }
   1063     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
   1064         return (UCollationResult)result;
   1065     }
   1066 
   1067     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
   1068     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
   1069     // and the benefit seems unlikely to be measurable.
   1070 
   1071     // Compare identical level.
   1072     const Normalizer2Impl &nfcImpl = data->nfcImpl;
   1073     left += equalPrefixLength;
   1074     right += equalPrefixLength;
   1075     if(settings->dontCheckFCD()) {
   1076         UTF16NFDIterator leftIter(left, leftLimit);
   1077         UTF16NFDIterator rightIter(right, rightLimit);
   1078         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1079     } else {
   1080         FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
   1081         FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
   1082         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1083     }
   1084 }
   1085 
   1086 UCollationResult
   1087 RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
   1088                              const uint8_t *right, int32_t rightLength,
   1089                              UErrorCode &errorCode) const {
   1090     // U_FAILURE(errorCode) checked by caller.
   1091     if(left == right && leftLength == rightLength) {
   1092         return UCOL_EQUAL;
   1093     }
   1094 
   1095     // Identical-prefix test.
   1096     int32_t equalPrefixLength = 0;
   1097     if(leftLength < 0) {
   1098         uint8_t c;
   1099         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
   1100             if(c == 0) { return UCOL_EQUAL; }
   1101             ++equalPrefixLength;
   1102         }
   1103     } else {
   1104         for(;;) {
   1105             if(equalPrefixLength == leftLength) {
   1106                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
   1107                 break;
   1108             } else if(equalPrefixLength == rightLength ||
   1109                       left[equalPrefixLength] != right[equalPrefixLength]) {
   1110                 break;
   1111             }
   1112             ++equalPrefixLength;
   1113         }
   1114     }
   1115     // Back up to the start of a partially-equal code point.
   1116     if(equalPrefixLength > 0 &&
   1117             ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
   1118             (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
   1119         while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
   1120     }
   1121 
   1122     UBool numeric = settings->isNumeric();
   1123     if(equalPrefixLength > 0) {
   1124         UBool unsafe = FALSE;
   1125         if(equalPrefixLength != leftLength) {
   1126             int32_t i = equalPrefixLength;
   1127             UChar32 c;
   1128             U8_NEXT_OR_FFFD(left, i, leftLength, c);
   1129             unsafe = data->isUnsafeBackward(c, numeric);
   1130         }
   1131         if(!unsafe && equalPrefixLength != rightLength) {
   1132             int32_t i = equalPrefixLength;
   1133             UChar32 c;
   1134             U8_NEXT_OR_FFFD(right, i, rightLength, c);
   1135             unsafe = data->isUnsafeBackward(c, numeric);
   1136         }
   1137         if(unsafe) {
   1138             // Identical prefix: Back up to the start of a contraction or reordering sequence.
   1139             UChar32 c;
   1140             do {
   1141                 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
   1142             } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
   1143         }
   1144         // See the notes in the UTF-16 version.
   1145 
   1146         // Pass the actual start of each string into the CollationIterators,
   1147         // plus the equalPrefixLength position,
   1148         // so that prefix matches back into the equal prefix work.
   1149     }
   1150 
   1151     int32_t result;
   1152     int32_t fastLatinOptions = settings->fastLatinOptions;
   1153     if(fastLatinOptions >= 0 &&
   1154             (equalPrefixLength == leftLength ||
   1155                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
   1156             (equalPrefixLength == rightLength ||
   1157                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
   1158         if(leftLength >= 0) {
   1159             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
   1160                                                      settings->fastLatinPrimaries,
   1161                                                      fastLatinOptions,
   1162                                                      left + equalPrefixLength,
   1163                                                      leftLength - equalPrefixLength,
   1164                                                      right + equalPrefixLength,
   1165                                                      rightLength - equalPrefixLength);
   1166         } else {
   1167             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
   1168                                                      settings->fastLatinPrimaries,
   1169                                                      fastLatinOptions,
   1170                                                      left + equalPrefixLength, -1,
   1171                                                      right + equalPrefixLength, -1);
   1172         }
   1173     } else {
   1174         result = CollationFastLatin::BAIL_OUT_RESULT;
   1175     }
   1176 
   1177     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
   1178         if(settings->dontCheckFCD()) {
   1179             UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
   1180             UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
   1181             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1182         } else {
   1183             FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
   1184             FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
   1185             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1186         }
   1187     }
   1188     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
   1189         return (UCollationResult)result;
   1190     }
   1191 
   1192     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
   1193     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
   1194     // and the benefit seems unlikely to be measurable.
   1195 
   1196     // Compare identical level.
   1197     const Normalizer2Impl &nfcImpl = data->nfcImpl;
   1198     left += equalPrefixLength;
   1199     right += equalPrefixLength;
   1200     if(leftLength > 0) {
   1201         leftLength -= equalPrefixLength;
   1202         rightLength -= equalPrefixLength;
   1203     }
   1204     if(settings->dontCheckFCD()) {
   1205         UTF8NFDIterator leftIter(left, leftLength);
   1206         UTF8NFDIterator rightIter(right, rightLength);
   1207         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1208     } else {
   1209         FCDUTF8NFDIterator leftIter(data, left, leftLength);
   1210         FCDUTF8NFDIterator rightIter(data, right, rightLength);
   1211         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1212     }
   1213 }
   1214 
   1215 UCollationResult
   1216 RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
   1217                            UErrorCode &errorCode) const {
   1218     if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
   1219     UBool numeric = settings->isNumeric();
   1220 
   1221     // Identical-prefix test.
   1222     int32_t equalPrefixLength = 0;
   1223     {
   1224         UChar32 leftUnit;
   1225         UChar32 rightUnit;
   1226         while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
   1227             if(leftUnit < 0) { return UCOL_EQUAL; }
   1228             ++equalPrefixLength;
   1229         }
   1230 
   1231         // Back out the code units that differed, for the real collation comparison.
   1232         if(leftUnit >= 0) { left.previous(&left); }
   1233         if(rightUnit >= 0) { right.previous(&right); }
   1234 
   1235         if(equalPrefixLength > 0) {
   1236             if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
   1237                     (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
   1238                 // Identical prefix: Back up to the start of a contraction or reordering sequence.
   1239                 do {
   1240                     --equalPrefixLength;
   1241                     leftUnit = left.previous(&left);
   1242                     right.previous(&right);
   1243                 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
   1244             }
   1245             // See the notes in the UTF-16 version.
   1246         }
   1247     }
   1248 
   1249     UCollationResult result;
   1250     if(settings->dontCheckFCD()) {
   1251         UIterCollationIterator leftIter(data, numeric, left);
   1252         UIterCollationIterator rightIter(data, numeric, right);
   1253         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1254     } else {
   1255         FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
   1256         FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
   1257         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1258     }
   1259     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
   1260         return result;
   1261     }
   1262 
   1263     // Compare identical level.
   1264     left.move(&left, equalPrefixLength, UITER_ZERO);
   1265     right.move(&right, equalPrefixLength, UITER_ZERO);
   1266     const Normalizer2Impl &nfcImpl = data->nfcImpl;
   1267     if(settings->dontCheckFCD()) {
   1268         UIterNFDIterator leftIter(left);
   1269         UIterNFDIterator rightIter(right);
   1270         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1271     } else {
   1272         FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
   1273         FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
   1274         return compareNFDIter(nfcImpl, leftIter, rightIter);
   1275     }
   1276 }
   1277 
   1278 CollationKey &
   1279 RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
   1280                                    UErrorCode &errorCode) const {
   1281     return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
   1282 }
   1283 
   1284 CollationKey &
   1285 RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey& key,
   1286                                    UErrorCode &errorCode) const {
   1287     if(U_FAILURE(errorCode)) {
   1288         return key.setToBogus();
   1289     }
   1290     if(s == NULL && length != 0) {
   1291         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1292         return key.setToBogus();
   1293     }
   1294     key.reset();  // resets the "bogus" state
   1295     CollationKeyByteSink sink(key);
   1296     writeSortKey(s, length, sink, errorCode);
   1297     if(U_FAILURE(errorCode)) {
   1298         key.setToBogus();
   1299     } else if(key.isBogus()) {
   1300         errorCode = U_MEMORY_ALLOCATION_ERROR;
   1301     } else {
   1302         key.setLength(sink.NumberOfBytesAppended());
   1303     }
   1304     return key;
   1305 }
   1306 
   1307 int32_t
   1308 RuleBasedCollator::getSortKey(const UnicodeString &s,
   1309                               uint8_t *dest, int32_t capacity) const {
   1310     return getSortKey(s.getBuffer(), s.length(), dest, capacity);
   1311 }
   1312 
   1313 int32_t
   1314 RuleBasedCollator::getSortKey(const UChar *s, int32_t length,
   1315                               uint8_t *dest, int32_t capacity) const {
   1316     if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity > 0)) {
   1317         return 0;
   1318     }
   1319     uint8_t noDest[1] = { 0 };
   1320     if(dest == NULL) {
   1321         // Distinguish pure preflighting from an allocation error.
   1322         dest = noDest;
   1323         capacity = 0;
   1324     }
   1325     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
   1326     UErrorCode errorCode = U_ZERO_ERROR;
   1327     writeSortKey(s, length, sink, errorCode);
   1328     return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
   1329 }
   1330 
   1331 void
   1332 RuleBasedCollator::writeSortKey(const UChar *s, int32_t length,
   1333                                 SortKeyByteSink &sink, UErrorCode &errorCode) const {
   1334     if(U_FAILURE(errorCode)) { return; }
   1335     const UChar *limit = (length >= 0) ? s + length : NULL;
   1336     UBool numeric = settings->isNumeric();
   1337     CollationKeys::LevelCallback callback;
   1338     if(settings->dontCheckFCD()) {
   1339         UTF16CollationIterator iter(data, numeric, s, s, limit);
   1340         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
   1341                                                   sink, Collation::PRIMARY_LEVEL,
   1342                                                   callback, TRUE, errorCode);
   1343     } else {
   1344         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
   1345         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
   1346                                                   sink, Collation::PRIMARY_LEVEL,
   1347                                                   callback, TRUE, errorCode);
   1348     }
   1349     if(settings->getStrength() == UCOL_IDENTICAL) {
   1350         writeIdenticalLevel(s, limit, sink, errorCode);
   1351     }
   1352     static const char terminator = 0;  // TERMINATOR_BYTE
   1353     sink.Append(&terminator, 1);
   1354 }
   1355 
   1356 void
   1357 RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit,
   1358                                        SortKeyByteSink &sink, UErrorCode &errorCode) const {
   1359     // NFD quick check
   1360     const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCode);
   1361     if(U_FAILURE(errorCode)) { return; }
   1362     sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
   1363     UChar32 prev = 0;
   1364     if(nfdQCYesLimit != s) {
   1365         prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
   1366     }
   1367     // Is there non-NFD text?
   1368     int32_t destLengthEstimate;
   1369     if(limit != NULL) {
   1370         if(nfdQCYesLimit == limit) { return; }
   1371         destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
   1372     } else {
   1373         // s is NUL-terminated
   1374         if(*nfdQCYesLimit == 0) { return; }
   1375         destLengthEstimate = -1;
   1376     }
   1377     UnicodeString nfd;
   1378     data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
   1379     u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
   1380 }
   1381 
   1382 namespace {
   1383 
   1384 /**
   1385  * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
   1386  * with an instance of this callback class.
   1387  * When another level is about to be written, the callback
   1388  * records the level and the number of bytes that will be written until
   1389  * the sink (which is actually a FixedSortKeyByteSink) fills up.
   1390  *
   1391  * When internalNextSortKeyPart() is called again, it restarts with the last level
   1392  * and ignores as many bytes as were written previously for that level.
   1393  */
   1394 class PartLevelCallback : public CollationKeys::LevelCallback {
   1395 public:
   1396     PartLevelCallback(const SortKeyByteSink &s)
   1397             : sink(s), level(Collation::PRIMARY_LEVEL) {
   1398         levelCapacity = sink.GetRemainingCapacity();
   1399     }
   1400     virtual ~PartLevelCallback() {}
   1401     virtual UBool needToWrite(Collation::Level l) {
   1402         if(!sink.Overflowed()) {
   1403             // Remember a level that will be at least partially written.
   1404             level = l;
   1405             levelCapacity = sink.GetRemainingCapacity();
   1406             return TRUE;
   1407         } else {
   1408             return FALSE;
   1409         }
   1410     }
   1411     Collation::Level getLevel() const { return level; }
   1412     int32_t getLevelCapacity() const { return levelCapacity; }
   1413 
   1414 private:
   1415     const SortKeyByteSink &sink;
   1416     Collation::Level level;
   1417     int32_t levelCapacity;
   1418 };
   1419 
   1420 }  // namespace
   1421 
   1422 int32_t
   1423 RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
   1424                                            uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
   1425     if(U_FAILURE(errorCode)) { return 0; }
   1426     if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)) {
   1427         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1428         return 0;
   1429     }
   1430     if(count == 0) { return 0; }
   1431 
   1432     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
   1433     sink.IgnoreBytes((int32_t)state[1]);
   1434     iter->move(iter, 0, UITER_START);
   1435 
   1436     Collation::Level level = (Collation::Level)state[0];
   1437     if(level <= Collation::QUATERNARY_LEVEL) {
   1438         UBool numeric = settings->isNumeric();
   1439         PartLevelCallback callback(sink);
   1440         if(settings->dontCheckFCD()) {
   1441             UIterCollationIterator ci(data, numeric, *iter);
   1442             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
   1443                                                       sink, level, callback, FALSE, errorCode);
   1444         } else {
   1445             FCDUIterCollationIterator ci(data, numeric, *iter, 0);
   1446             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
   1447                                                       sink, level, callback, FALSE, errorCode);
   1448         }
   1449         if(U_FAILURE(errorCode)) { return 0; }
   1450         if(sink.NumberOfBytesAppended() > count) {
   1451             state[0] = (uint32_t)callback.getLevel();
   1452             state[1] = (uint32_t)callback.getLevelCapacity();
   1453             return count;
   1454         }
   1455         // All of the normal levels are done.
   1456         if(settings->getStrength() == UCOL_IDENTICAL) {
   1457             level = Collation::IDENTICAL_LEVEL;
   1458             iter->move(iter, 0, UITER_START);
   1459         }
   1460         // else fall through to setting ZERO_LEVEL
   1461     }
   1462 
   1463     if(level == Collation::IDENTICAL_LEVEL) {
   1464         int32_t levelCapacity = sink.GetRemainingCapacity();
   1465         UnicodeString s;
   1466         for(;;) {
   1467             UChar32 c = iter->next(iter);
   1468             if(c < 0) { break; }
   1469             s.append((UChar)c);
   1470         }
   1471         const UChar *sArray = s.getBuffer();
   1472         writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
   1473         if(U_FAILURE(errorCode)) { return 0; }
   1474         if(sink.NumberOfBytesAppended() > count) {
   1475             state[0] = (uint32_t)level;
   1476             state[1] = (uint32_t)levelCapacity;
   1477             return count;
   1478         }
   1479     }
   1480 
   1481     // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
   1482     state[0] = (uint32_t)Collation::ZERO_LEVEL;
   1483     state[1] = 0;
   1484     int32_t length = sink.NumberOfBytesAppended();
   1485     int32_t i = length;
   1486     while(i < count) { dest[i++] = 0; }
   1487     return length;
   1488 }
   1489 
   1490 void
   1491 RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
   1492                                   UErrorCode &errorCode) const {
   1493     if(U_FAILURE(errorCode)) { return; }
   1494     const UChar *s = str.getBuffer();
   1495     const UChar *limit = s + str.length();
   1496     UBool numeric = settings->isNumeric();
   1497     if(settings->dontCheckFCD()) {
   1498         UTF16CollationIterator iter(data, numeric, s, s, limit);
   1499         int64_t ce;
   1500         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
   1501             ces.addElement(ce, errorCode);
   1502         }
   1503     } else {
   1504         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
   1505         int64_t ce;
   1506         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
   1507             ces.addElement(ce, errorCode);
   1508         }
   1509     }
   1510 }
   1511 
   1512 namespace {
   1513 
   1514 void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
   1515                   UErrorCode &errorCode) {
   1516     if(U_FAILURE(errorCode) || length == 0) { return; }
   1517     if(!s.isEmpty()) {
   1518         s.append('_', errorCode);
   1519     }
   1520     s.append(letter, errorCode);
   1521     for(int32_t i = 0; i < length; ++i) {
   1522         s.append(uprv_toupper(subtag[i]), errorCode);
   1523     }
   1524 }
   1525 
   1526 void appendAttribute(CharString &s, char letter, UColAttributeValue value,
   1527                      UErrorCode &errorCode) {
   1528     if(U_FAILURE(errorCode)) { return; }
   1529     if(!s.isEmpty()) {
   1530         s.append('_', errorCode);
   1531     }
   1532     static const char *valueChars = "1234...........IXO..SN..LU......";
   1533     s.append(letter, errorCode);
   1534     s.append(valueChars[value], errorCode);
   1535 }
   1536 
   1537 }  // namespace
   1538 
   1539 int32_t
   1540 RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
   1541                                                     char *buffer, int32_t capacity,
   1542                                                     UErrorCode &errorCode) const {
   1543     if(U_FAILURE(errorCode)) { return 0; }
   1544     if(buffer == NULL ? capacity != 0 : capacity < 0) {
   1545         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1546         return 0;
   1547     }
   1548     if(locale == NULL) {
   1549         locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
   1550     }
   1551 
   1552     char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
   1553     int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
   1554                                                   "collation", locale,
   1555                                                   NULL, &errorCode);
   1556     if(U_FAILURE(errorCode)) { return 0; }
   1557     if(length == 0) {
   1558         uprv_strcpy(resultLocale, "root");
   1559     } else {
   1560         resultLocale[length] = 0;
   1561     }
   1562 
   1563     // Append items in alphabetic order of their short definition letters.
   1564     CharString result;
   1565     char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];
   1566 
   1567     if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
   1568         appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
   1569     }
   1570     // ATTR_VARIABLE_TOP not supported because 'B' was broken.
   1571     // See ICU tickets #10372 and #10386.
   1572     if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
   1573         appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
   1574     }
   1575     if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
   1576         appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
   1577     }
   1578     if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
   1579         appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
   1580     }
   1581     if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
   1582         appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
   1583     }
   1584     // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
   1585     length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTHOF(subtag), &errorCode);
   1586     appendSubtag(result, 'K', subtag, length, errorCode);
   1587     length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
   1588     appendSubtag(result, 'L', subtag, length, errorCode);
   1589     if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
   1590         appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
   1591     }
   1592     length = uloc_getCountry(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
   1593     appendSubtag(result, 'R', subtag, length, errorCode);
   1594     if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
   1595         appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
   1596     }
   1597     length = uloc_getVariant(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
   1598     appendSubtag(result, 'V', subtag, length, errorCode);
   1599     length = uloc_getScript(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
   1600     appendSubtag(result, 'Z', subtag, length, errorCode);
   1601 
   1602     if(U_FAILURE(errorCode)) { return 0; }
   1603     if(result.length() <= capacity) {
   1604         uprv_memcpy(buffer, result.data(), result.length());
   1605     }
   1606     return u_terminateChars(buffer, capacity, result.length(), &errorCode);
   1607 }
   1608 
   1609 UBool
   1610 RuleBasedCollator::isUnsafe(UChar32 c) const {
   1611     return data->isUnsafeBackward(c, settings->isNumeric());
   1612 }
   1613 
   1614 void U_CALLCONV
   1615 RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
   1616     t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
   1617 }
   1618 
   1619 UBool
   1620 RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
   1621     umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
   1622     return U_SUCCESS(errorCode);
   1623 }
   1624 
   1625 CollationElementIterator *
   1626 RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
   1627     UErrorCode errorCode = U_ZERO_ERROR;
   1628     if(!initMaxExpansions(errorCode)) { return NULL; }
   1629     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
   1630     if(U_FAILURE(errorCode)) {
   1631         delete cei;
   1632         return NULL;
   1633     }
   1634     return cei;
   1635 }
   1636 
   1637 CollationElementIterator *
   1638 RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
   1639     UErrorCode errorCode = U_ZERO_ERROR;
   1640     if(!initMaxExpansions(errorCode)) { return NULL; }
   1641     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
   1642     if(U_FAILURE(errorCode)) {
   1643         delete cei;
   1644         return NULL;
   1645     }
   1646     return cei;
   1647 }
   1648 
   1649 int32_t
   1650 RuleBasedCollator::getMaxExpansion(int32_t order) const {
   1651     UErrorCode errorCode = U_ZERO_ERROR;
   1652     (void)initMaxExpansions(errorCode);
   1653     return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
   1654 }
   1655 
   1656 U_NAMESPACE_END
   1657 
   1658 #endif  // !UCONFIG_NO_COLLATION
   1659