Home | History | Annotate | Download | only in i18n
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2009-2014, International Business Machines Corporation and
      6 * others. All Rights Reserved.
      7 *******************************************************************************
      8 */
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #if !UCONFIG_NO_COLLATION
     13 
     14 #include "unicode/alphaindex.h"
     15 #include "unicode/coll.h"
     16 #include "unicode/localpointer.h"
     17 #include "unicode/normalizer2.h"
     18 #include "unicode/tblcoll.h"
     19 #include "unicode/uchar.h"
     20 #include "unicode/ulocdata.h"
     21 #include "unicode/uniset.h"
     22 #include "unicode/uobject.h"
     23 #include "unicode/usetiter.h"
     24 #include "unicode/utf16.h"
     25 
     26 #include "cmemory.h"
     27 #include "cstring.h"
     28 #include "uassert.h"
     29 #include "uvector.h"
     30 #include "uvectr64.h"
     31 
     32 //#include <string>
     33 //#include <iostream>
     34 
     35 U_NAMESPACE_BEGIN
     36 
     37 namespace {
     38 
     39 /**
     40  * Prefix string for Chinese index buckets.
     41  * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
     42  */
     43 const UChar BASE[1] = { 0xFDD0 };
     44 const int32_t BASE_LENGTH = 1;
     45 
     46 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
     47                                 const UnicodeString &one, const UnicodeString &other);
     48 
     49 }  // namespace
     50 
     51 static int32_t U_CALLCONV
     52 collatorComparator(const void *context, const void *left, const void *right);
     53 
     54 static int32_t U_CALLCONV
     55 recordCompareFn(const void *context, const void *left, const void *right);
     56 
     57 //  UVector<Record *> support function, delete a Record.
     58 static void U_CALLCONV
     59 alphaIndex_deleteRecord(void *obj) {
     60     delete static_cast<AlphabeticIndex::Record *>(obj);
     61 }
     62 
     63 namespace {
     64 
     65 UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
     66                            UErrorCode &errorCode) {
     67     if (U_FAILURE(errorCode)) { return NULL; }
     68     if (owned.isValid()) {
     69         return owned.orphan();
     70     }
     71     UnicodeString *p = new UnicodeString(s);
     72     if (p == NULL) {
     73         errorCode = U_MEMORY_ALLOCATION_ERROR;
     74     }
     75     return p;
     76 }
     77 
     78 inline UnicodeString *getString(const UVector &list, int32_t i) {
     79     return static_cast<UnicodeString *>(list[i]);
     80 }
     81 
     82 inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
     83     return static_cast<AlphabeticIndex::Bucket *>(list[i]);
     84 }
     85 
     86 inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
     87     return static_cast<AlphabeticIndex::Record *>(list[i]);
     88 }
     89 
     90 /**
     91  * Like Java Collections.binarySearch(List, String, Comparator).
     92  *
     93  * @return the index>=0 where the item was found,
     94  *         or the index<0 for inserting the string at ~index in sorted order
     95  */
     96 int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
     97     if (list.size() == 0) { return ~0; }
     98     int32_t start = 0;
     99     int32_t limit = list.size();
    100     for (;;) {
    101         int32_t i = (start + limit) / 2;
    102         const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
    103         UErrorCode errorCode = U_ZERO_ERROR;
    104         UCollationResult cmp = coll.compare(s, *si, errorCode);
    105         if (cmp == UCOL_EQUAL) {
    106             return i;
    107         } else if (cmp < 0) {
    108             if (i == start) {
    109                 return ~start;  // insert s before *si
    110             }
    111             limit = i;
    112         } else {
    113             if (i == start) {
    114                 return ~(start + 1);  // insert s after *si
    115             }
    116             start = i;
    117         }
    118     }
    119 }
    120 
    121 }  // namespace
    122 
    123 // The BucketList is not in the anonymous namespace because only Clang
    124 // seems to support its use in other classes from there.
    125 // However, we also don't need U_I18N_API because it is not used from outside the i18n library.
    126 class BucketList : public UObject {
    127 public:
    128     BucketList(UVector *bucketList, UVector *publicBucketList)
    129             : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
    130         int32_t displayIndex = 0;
    131         for (int32_t i = 0; i < publicBucketList->size(); ++i) {
    132             getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
    133         }
    134     }
    135 
    136     // The virtual destructor must not be inline.
    137     // See ticket #8454 for details.
    138     virtual ~BucketList();
    139 
    140     int32_t getBucketCount() const {
    141         return immutableVisibleList_->size();
    142     }
    143 
    144     int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
    145                            UErrorCode &errorCode) {
    146         // binary search
    147         int32_t start = 0;
    148         int32_t limit = bucketList_->size();
    149         while ((start + 1) < limit) {
    150             int32_t i = (start + limit) / 2;
    151             const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
    152             UCollationResult nameVsBucket =
    153                 collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
    154             if (nameVsBucket < 0) {
    155                 limit = i;
    156             } else {
    157                 start = i;
    158             }
    159         }
    160         const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
    161         if (bucket->displayBucket_ != NULL) {
    162             bucket = bucket->displayBucket_;
    163         }
    164         return bucket->displayIndex_;
    165     }
    166 
    167     /** All of the buckets, visible and invisible. */
    168     UVector *bucketList_;
    169     /** Just the visible buckets. */
    170     UVector *immutableVisibleList_;
    171 };
    172 
    173 BucketList::~BucketList() {
    174     delete bucketList_;
    175     if (immutableVisibleList_ != bucketList_) {
    176         delete immutableVisibleList_;
    177     }
    178 }
    179 
    180 AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
    181     delete buckets_;
    182     delete collatorPrimaryOnly_;
    183 }
    184 
    185 int32_t
    186 AlphabeticIndex::ImmutableIndex::getBucketCount() const {
    187     return buckets_->getBucketCount();
    188 }
    189 
    190 int32_t
    191 AlphabeticIndex::ImmutableIndex::getBucketIndex(
    192         const UnicodeString &name, UErrorCode &errorCode) const {
    193     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
    194 }
    195 
    196 const AlphabeticIndex::Bucket *
    197 AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
    198     if (0 <= index && index < buckets_->getBucketCount()) {
    199         return icu::getBucket(*buckets_->immutableVisibleList_, index);
    200     } else {
    201         return NULL;
    202     }
    203 }
    204 
    205 AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
    206         : inputList_(NULL),
    207           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
    208           maxLabelCount_(99),
    209           initialLabels_(NULL), firstCharsInScripts_(NULL),
    210           collator_(NULL), collatorPrimaryOnly_(NULL),
    211           buckets_(NULL) {
    212     init(&locale, status);
    213 }
    214 
    215 
    216 AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
    217         : inputList_(NULL),
    218           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
    219           maxLabelCount_(99),
    220           initialLabels_(NULL), firstCharsInScripts_(NULL),
    221           collator_(collator), collatorPrimaryOnly_(NULL),
    222           buckets_(NULL) {
    223     init(NULL, status);
    224 }
    225 
    226 
    227 
    228 AlphabeticIndex::~AlphabeticIndex() {
    229     delete collator_;
    230     delete collatorPrimaryOnly_;
    231     delete firstCharsInScripts_;
    232     delete buckets_;
    233     delete inputList_;
    234     delete initialLabels_;
    235 }
    236 
    237 
    238 AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
    239     if (U_FAILURE(status)) {
    240         return *this;
    241     }
    242     initialLabels_->addAll(additions);
    243     clearBuckets();
    244     return *this;
    245 }
    246 
    247 
    248 AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
    249     addIndexExemplars(locale, status);
    250     clearBuckets();
    251     return *this;
    252 }
    253 
    254 
    255 AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
    256     if (U_FAILURE(errorCode)) { return NULL; }
    257     // In C++, the ImmutableIndex must own its copy of the BucketList,
    258     // even if it contains no records, for proper memory management.
    259     // We could clone the buckets_ if they are not NULL,
    260     // but that would be worth it only if this method is called multiple times,
    261     // or called after using the old-style bucket iterator API.
    262     LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
    263     LocalPointer<RuleBasedCollator> coll(
    264         static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
    265     if (immutableBucketList.isNull() || coll.isNull()) {
    266         errorCode = U_MEMORY_ALLOCATION_ERROR;
    267         return NULL;
    268     }
    269     ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
    270     if (immIndex == NULL) {
    271         errorCode = U_MEMORY_ALLOCATION_ERROR;
    272         return NULL;
    273     }
    274     // The ImmutableIndex adopted its parameter objects.
    275     immutableBucketList.orphan();
    276     coll.orphan();
    277     return immIndex;
    278 }
    279 
    280 int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
    281     initBuckets(status);
    282     if (U_FAILURE(status)) {
    283         return 0;
    284     }
    285     return buckets_->getBucketCount();
    286 }
    287 
    288 
    289 int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
    290     if (U_FAILURE(status) || inputList_ == NULL) {
    291         return 0;
    292     }
    293     return inputList_->size();
    294 }
    295 
    296 void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
    297     const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
    298     if (U_FAILURE(errorCode)) { return; }
    299 
    300     const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
    301     const UnicodeString &overflowBoundary =
    302         *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
    303 
    304     // We make a sorted array of elements.
    305     // Some of the input may be redundant.
    306     // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
    307     // We filter out those cases.
    308     UnicodeSetIterator iter(*initialLabels_);
    309     while (iter.next()) {
    310         const UnicodeString *item = &iter.getString();
    311         LocalPointer<UnicodeString> ownedItem;
    312         UBool checkDistinct;
    313         int32_t itemLength = item->length();
    314         if (!item->hasMoreChar32Than(0, itemLength, 1)) {
    315             checkDistinct = FALSE;
    316         } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
    317                 item->charAt(itemLength - 2) != 0x2a) {
    318             // Use a label if it is marked with one trailing star,
    319             // even if the label string sorts the same when all contractions are suppressed.
    320             ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
    321             item = ownedItem.getAlias();
    322             if (item == NULL) {
    323                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    324                 return;
    325             }
    326             checkDistinct = FALSE;
    327         } else {
    328             checkDistinct = TRUE;
    329         }
    330         if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
    331             // Ignore a primary-ignorable or non-alphabetic index character.
    332         } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
    333             // Ignore an index character that will land in the overflow bucket.
    334         } else if (checkDistinct &&
    335                 collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
    336             // Ignore a multi-code point index character that does not sort distinctly
    337             // from the sequence of its separate characters.
    338         } else {
    339             int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
    340             if (insertionPoint < 0) {
    341                 indexCharacters.insertElementAt(
    342                     ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
    343             } else {
    344                 const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
    345                 if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
    346                     indexCharacters.setElementAt(
    347                         ownedString(*item, ownedItem, errorCode), insertionPoint);
    348                 }
    349             }
    350         }
    351     }
    352     if (U_FAILURE(errorCode)) { return; }
    353 
    354     // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
    355 
    356     int32_t size = indexCharacters.size() - 1;
    357     if (size > maxLabelCount_) {
    358         int32_t count = 0;
    359         int32_t old = -1;
    360         for (int32_t i = 0; i < indexCharacters.size();) {
    361             ++count;
    362             int32_t bump = count * maxLabelCount_ / size;
    363             if (bump == old) {
    364                 indexCharacters.removeElementAt(i);
    365             } else {
    366                 old = bump;
    367                 ++i;
    368             }
    369         }
    370     }
    371 }
    372 
    373 namespace {
    374 
    375 const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
    376     if (!current.startsWith(BASE, BASE_LENGTH)) {
    377         return current;
    378     }
    379     UChar rest = current.charAt(BASE_LENGTH);
    380     if (0x2800 < rest && rest <= 0x28FF) { // stroke count
    381         int32_t count = rest-0x2800;
    382         temp.setTo((UChar)(0x30 + count % 10));
    383         if (count >= 10) {
    384             count /= 10;
    385             temp.insert(0, (UChar)(0x30 + count % 10));
    386             if (count >= 10) {
    387                 count /= 10;
    388                 temp.insert(0, (UChar)(0x30 + count));
    389             }
    390         }
    391         return temp.append((UChar)0x5283);
    392     }
    393     return temp.setTo(current, BASE_LENGTH);
    394 }
    395 
    396 UBool hasMultiplePrimaryWeights(
    397         const RuleBasedCollator &coll, uint32_t variableTop,
    398         const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
    399     ces.removeAllElements();
    400     coll.internalGetCEs(s, ces, errorCode);
    401     if (U_FAILURE(errorCode)) { return FALSE; }
    402     UBool seenPrimary = FALSE;
    403     for (int32_t i = 0; i < ces.size(); ++i) {
    404         int64_t ce = ces.elementAti(i);
    405         uint32_t p = (uint32_t)(ce >> 32);
    406         if (p > variableTop) {
    407             // not primary ignorable
    408             if (seenPrimary) {
    409                 return TRUE;
    410             }
    411             seenPrimary = TRUE;
    412         }
    413     }
    414     return FALSE;
    415 }
    416 
    417 }  // namespace
    418 
    419 BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
    420     // Initialize indexCharacters.
    421     UVector indexCharacters(errorCode);
    422     indexCharacters.setDeleter(uprv_deleteUObject);
    423     initLabels(indexCharacters, errorCode);
    424     if (U_FAILURE(errorCode)) { return NULL; }
    425 
    426     // Variables for hasMultiplePrimaryWeights().
    427     UVector64 ces(errorCode);
    428     uint32_t variableTop;
    429     if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
    430         variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
    431     } else {
    432         variableTop = 0;
    433     }
    434     UBool hasInvisibleBuckets = FALSE;
    435 
    436     // Helper arrays for Chinese Pinyin collation.
    437     Bucket *asciiBuckets[26] = {
    438         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    439         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
    440     };
    441     Bucket *pinyinBuckets[26] = {
    442         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    443         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
    444     };
    445     UBool hasPinyin = FALSE;
    446 
    447     LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
    448     if (U_FAILURE(errorCode)) {
    449         return NULL;
    450     }
    451     bucketList->setDeleter(uprv_deleteUObject);
    452 
    453     // underflow bucket
    454     Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
    455     if (bucket == NULL) {
    456         errorCode = U_MEMORY_ALLOCATION_ERROR;
    457         return NULL;
    458     }
    459     bucketList->addElement(bucket, errorCode);
    460     if (U_FAILURE(errorCode)) { return NULL; }
    461 
    462     UnicodeString temp;
    463 
    464     // fix up the list, adding underflow, additions, overflow
    465     // Insert inflow labels as needed.
    466     int32_t scriptIndex = -1;
    467     const UnicodeString *scriptUpperBoundary = &emptyString_;
    468     for (int32_t i = 0; i < indexCharacters.size(); ++i) {
    469         UnicodeString &current = *getString(indexCharacters, i);
    470         if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
    471             // We crossed the script boundary into a new script.
    472             const UnicodeString &inflowBoundary = *scriptUpperBoundary;
    473             UBool skippedScript = FALSE;
    474             for (;;) {
    475                 scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
    476                 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
    477                     break;
    478                 }
    479                 skippedScript = TRUE;
    480             }
    481             if (skippedScript && bucketList->size() > 1) {
    482                 // We are skipping one or more scripts,
    483                 // and we are not just getting out of the underflow label.
    484                 bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
    485                 if (bucket == NULL) {
    486                     errorCode = U_MEMORY_ALLOCATION_ERROR;
    487                     return NULL;
    488                 }
    489                 bucketList->addElement(bucket, errorCode);
    490             }
    491         }
    492         // Add a bucket with the current label.
    493         bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
    494         if (bucket == NULL) {
    495             errorCode = U_MEMORY_ALLOCATION_ERROR;
    496             return NULL;
    497         }
    498         bucketList->addElement(bucket, errorCode);
    499         // Remember ASCII and Pinyin buckets for Pinyin redirects.
    500         UChar c;
    501         if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
    502             asciiBuckets[c - 0x41] = bucket;
    503         } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
    504                 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
    505             pinyinBuckets[c - 0x41] = bucket;
    506             hasPinyin = TRUE;
    507         }
    508         // Check for multiple primary weights.
    509         if (!current.startsWith(BASE, BASE_LENGTH) &&
    510                 hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
    511                                           ces, errorCode) &&
    512                 current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
    513             // "AE-ligature" or "Sch" etc.
    514             for (int32_t i = bucketList->size() - 2;; --i) {
    515                 Bucket *singleBucket = getBucket(*bucketList, i);
    516                 if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
    517                     // There is no single-character bucket since the last
    518                     // underflow or inflow label.
    519                     break;
    520                 }
    521                 if (singleBucket->displayBucket_ == NULL &&
    522                         !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
    523                                                    singleBucket->lowerBoundary_,
    524                                                    ces, errorCode)) {
    525                     // Add an invisible bucket that redirects strings greater than the expansion
    526                     // to the previous single-character bucket.
    527                     // For example, after ... Q R S Sch we add Sch\uFFFF->S
    528                     // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
    529                     bucket = new Bucket(emptyString_,
    530                         UnicodeString(current).append((UChar)0xFFFF),
    531                         U_ALPHAINDEX_NORMAL);
    532                     if (bucket == NULL) {
    533                         errorCode = U_MEMORY_ALLOCATION_ERROR;
    534                         return NULL;
    535                     }
    536                     bucket->displayBucket_ = singleBucket;
    537                     bucketList->addElement(bucket, errorCode);
    538                     hasInvisibleBuckets = TRUE;
    539                     break;
    540                 }
    541             }
    542         }
    543     }
    544     if (U_FAILURE(errorCode)) { return NULL; }
    545     if (bucketList->size() == 1) {
    546         // No real labels, show only the underflow label.
    547         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
    548         if (bl == NULL) {
    549             errorCode = U_MEMORY_ALLOCATION_ERROR;
    550             return NULL;
    551         }
    552         bucketList.orphan();
    553         return bl;
    554     }
    555     // overflow bucket
    556     bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
    557     if (bucket == NULL) {
    558         errorCode = U_MEMORY_ALLOCATION_ERROR;
    559         return NULL;
    560     }
    561     bucketList->addElement(bucket, errorCode); // final
    562 
    563     if (hasPinyin) {
    564         // Redirect Pinyin buckets.
    565         Bucket *asciiBucket = NULL;
    566         for (int32_t i = 0; i < 26; ++i) {
    567             if (asciiBuckets[i] != NULL) {
    568                 asciiBucket = asciiBuckets[i];
    569             }
    570             if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
    571                 pinyinBuckets[i]->displayBucket_ = asciiBucket;
    572                 hasInvisibleBuckets = TRUE;
    573             }
    574         }
    575     }
    576 
    577     if (U_FAILURE(errorCode)) { return NULL; }
    578     if (!hasInvisibleBuckets) {
    579         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
    580         if (bl == NULL) {
    581             errorCode = U_MEMORY_ALLOCATION_ERROR;
    582             return NULL;
    583         }
    584         bucketList.orphan();
    585         return bl;
    586     }
    587     // Merge inflow buckets that are visually adjacent.
    588     // Iterate backwards: Merge inflow into overflow rather than the other way around.
    589     int32_t i = bucketList->size() - 1;
    590     Bucket *nextBucket = getBucket(*bucketList, i);
    591     while (--i > 0) {
    592         bucket = getBucket(*bucketList, i);
    593         if (bucket->displayBucket_ != NULL) {
    594             continue;  // skip invisible buckets
    595         }
    596         if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
    597             if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
    598                 bucket->displayBucket_ = nextBucket;
    599                 continue;
    600             }
    601         }
    602         nextBucket = bucket;
    603     }
    604 
    605     LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
    606     if (U_FAILURE(errorCode)) {
    607         return NULL;
    608     }
    609     // Do not call publicBucketList->setDeleter():
    610     // This vector shares its objects with the bucketList.
    611     for (int32_t i = 0; i < bucketList->size(); ++i) {
    612         bucket = getBucket(*bucketList, i);
    613         if (bucket->displayBucket_ == NULL) {
    614             publicBucketList->addElement(bucket, errorCode);
    615         }
    616     }
    617     if (U_FAILURE(errorCode)) { return NULL; }
    618     BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
    619     if (bl == NULL) {
    620         errorCode = U_MEMORY_ALLOCATION_ERROR;
    621         return NULL;
    622     }
    623     bucketList.orphan();
    624     publicBucketList.orphan();
    625     return bl;
    626 }
    627 
    628 /**
    629  * Creates an index, and buckets and sorts the list of records into the index.
    630  */
    631 void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
    632     if (U_FAILURE(errorCode) || buckets_ != NULL) {
    633         return;
    634     }
    635     buckets_ = createBucketList(errorCode);
    636     if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
    637         return;
    638     }
    639 
    640     // Sort the records by name.
    641     // Stable sort preserves input order of collation duplicates.
    642     inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
    643 
    644     // Now, we traverse all of the input, which is now sorted.
    645     // If the item doesn't go in the current bucket, we find the next bucket that contains it.
    646     // This makes the process order n*log(n), since we just sort the list and then do a linear process.
    647     // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
    648     // we need to improve it for that case.
    649 
    650     Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
    651     int32_t bucketIndex = 1;
    652     Bucket *nextBucket;
    653     const UnicodeString *upperBoundary;
    654     if (bucketIndex < buckets_->bucketList_->size()) {
    655         nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
    656         upperBoundary = &nextBucket->lowerBoundary_;
    657     } else {
    658         nextBucket = NULL;
    659         upperBoundary = NULL;
    660     }
    661     for (int32_t i = 0; i < inputList_->size(); ++i) {
    662         Record *r = getRecord(*inputList_, i);
    663         // if the current bucket isn't the right one, find the one that is
    664         // We have a special flag for the last bucket so that we don't look any further
    665         while (upperBoundary != NULL &&
    666                 collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
    667             currentBucket = nextBucket;
    668             // now reset the boundary that we compare against
    669             if (bucketIndex < buckets_->bucketList_->size()) {
    670                 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
    671                 upperBoundary = &nextBucket->lowerBoundary_;
    672             } else {
    673                 upperBoundary = NULL;
    674             }
    675         }
    676         // now put the record into the bucket.
    677         Bucket *bucket = currentBucket;
    678         if (bucket->displayBucket_ != NULL) {
    679             bucket = bucket->displayBucket_;
    680         }
    681         if (bucket->records_ == NULL) {
    682             bucket->records_ = new UVector(errorCode);
    683             if (bucket->records_ == NULL) {
    684                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    685                 return;
    686             }
    687         }
    688         bucket->records_->addElement(r, errorCode);
    689     }
    690 }
    691 
    692 void AlphabeticIndex::clearBuckets() {
    693     if (buckets_ != NULL) {
    694         delete buckets_;
    695         buckets_ = NULL;
    696         internalResetBucketIterator();
    697     }
    698 }
    699 
    700 void AlphabeticIndex::internalResetBucketIterator() {
    701     labelsIterIndex_ = -1;
    702     currentBucket_ = NULL;
    703 }
    704 
    705 
    706 void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
    707     LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
    708     if (U_FAILURE(status)) {
    709         return;
    710     }
    711 
    712     UnicodeSet exemplars;
    713     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
    714     if (U_SUCCESS(status)) {
    715         initialLabels_->addAll(exemplars);
    716         return;
    717     }
    718     status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
    719 
    720     // The locale data did not include explicit Index characters.
    721     // Synthesize a set of them from the locale's standard exemplar characters.
    722     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
    723     if (U_FAILURE(status)) {
    724         return;
    725     }
    726 
    727     // question: should we add auxiliary exemplars?
    728     if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
    729         exemplars.add(0x61, 0x7A);
    730     }
    731     if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
    732         // cut down to small list
    733         exemplars.remove(0xAC00, 0xD7A3).
    734             add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
    735             add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
    736             add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
    737             add(0xD30C).add(0xD558);
    738     }
    739     if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
    740         // cut down to small list
    741         // make use of the fact that Ethiopic is allocated in 8's, where
    742         // the base is 0 mod 8.
    743         UnicodeSet ethiopic(
    744             UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
    745         UnicodeSetIterator it(ethiopic);
    746         while (it.next() && !it.isString()) {
    747             if ((it.getCodepoint() & 0x7) != 0) {
    748                 exemplars.remove(it.getCodepoint());
    749             }
    750         }
    751     }
    752 
    753     // Upper-case any that aren't already so.
    754     //   (We only do this for synthesized index characters.)
    755     UnicodeSetIterator it(exemplars);
    756     UnicodeString upperC;
    757     while (it.next()) {
    758         const UnicodeString &exemplarC = it.getString();
    759         upperC = exemplarC;
    760         upperC.toUpper(locale);
    761         initialLabels_->add(upperC);
    762     }
    763 }
    764 
    765 UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
    766     UnicodeSet contractions;
    767     collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
    768     if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
    769     initialLabels_->addAll(contractions);
    770     UnicodeSetIterator iter(contractions);
    771     while (iter.next()) {
    772         const UnicodeString &s = iter.getString();
    773         U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
    774         UChar c = s.charAt(s.length() - 1);
    775         if (0x41 <= c && c <= 0x5A) {  // A-Z
    776             // There are Pinyin labels, add ASCII A-Z labels as well.
    777             initialLabels_->add(0x41, 0x5A);  // A-Z
    778             break;
    779         }
    780     }
    781     return TRUE;
    782 }
    783 
    784 
    785 /*
    786  * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
    787  */
    788 static const UChar CGJ = 0x034F;
    789 UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
    790     UnicodeString result;
    791     if (item.length() == 0) {
    792         return result;
    793     }
    794     int32_t i = 0;
    795     for (;;) {
    796         UChar32  cp = item.char32At(i);
    797         result.append(cp);
    798         i = item.moveIndex32(i, 1);
    799         if (i >= item.length()) {
    800             break;
    801         }
    802         result.append(CGJ);
    803     }
    804     return result;
    805 }
    806 
    807 
    808 UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
    809     return FALSE;
    810 }
    811 
    812 
    813 UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
    814     return FALSE;
    815 }
    816 
    817 
    818 const RuleBasedCollator &AlphabeticIndex::getCollator() const {
    819     return *collator_;
    820 }
    821 
    822 
    823 const UnicodeString &AlphabeticIndex::getInflowLabel() const {
    824     return inflowLabel_;
    825 }
    826 
    827 const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
    828     return overflowLabel_;
    829 }
    830 
    831 
    832 const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
    833     return underflowLabel_;
    834 }
    835 
    836 
    837 AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
    838     inflowLabel_ = label;
    839     clearBuckets();
    840     return *this;
    841 }
    842 
    843 
    844 AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
    845     overflowLabel_ = label;
    846     clearBuckets();
    847     return *this;
    848 }
    849 
    850 
    851 AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
    852     underflowLabel_ = label;
    853     clearBuckets();
    854     return *this;
    855 }
    856 
    857 
    858 int32_t AlphabeticIndex::getMaxLabelCount() const {
    859     return maxLabelCount_;
    860 }
    861 
    862 
    863 AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
    864     if (U_FAILURE(status)) {
    865         return *this;
    866     }
    867     if (maxLabelCount <= 0) {
    868         status = U_ILLEGAL_ARGUMENT_ERROR;
    869         return *this;
    870     }
    871     maxLabelCount_ = maxLabelCount;
    872     clearBuckets();
    873     return *this;
    874 }
    875 
    876 
    877 //
    878 //  init() - Common code for constructors.
    879 //
    880 
    881 void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
    882     if (U_FAILURE(status)) { return; }
    883     if (locale == NULL && collator_ == NULL) {
    884         status = U_ILLEGAL_ARGUMENT_ERROR;
    885         return;
    886     }
    887 
    888     initialLabels_         = new UnicodeSet();
    889     if (initialLabels_ == NULL) {
    890         status = U_MEMORY_ALLOCATION_ERROR;
    891         return;
    892     }
    893 
    894     inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
    895     overflowLabel_ = inflowLabel_;
    896     underflowLabel_ = inflowLabel_;
    897 
    898     if (collator_ == NULL) {
    899         Collator *coll = Collator::createInstance(*locale, status);
    900         if (U_FAILURE(status)) {
    901             delete coll;
    902             return;
    903         }
    904         if (coll == NULL) {
    905             status = U_MEMORY_ALLOCATION_ERROR;
    906             return;
    907         }
    908         collator_ = dynamic_cast<RuleBasedCollator *>(coll);
    909         if (collator_ == NULL) {
    910             delete coll;
    911             status = U_UNSUPPORTED_ERROR;
    912             return;
    913         }
    914     }
    915     collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
    916     if (collatorPrimaryOnly_ == NULL) {
    917         status = U_MEMORY_ALLOCATION_ERROR;
    918         return;
    919     }
    920     collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
    921     firstCharsInScripts_ = firstStringsInScript(status);
    922     if (U_FAILURE(status)) { return; }
    923     firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
    924     // Guard against a degenerate collator where
    925     // some script boundary strings are primary ignorable.
    926     for (;;) {
    927         if (U_FAILURE(status)) { return; }
    928         if (firstCharsInScripts_->isEmpty()) {
    929             // AlphabeticIndex requires some non-ignorable script boundary strings.
    930             status = U_ILLEGAL_ARGUMENT_ERROR;
    931             return;
    932         }
    933         if (collatorPrimaryOnly_->compare(
    934                 *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
    935                 emptyString_, status) == UCOL_EQUAL) {
    936             firstCharsInScripts_->removeElementAt(0);
    937         } else {
    938             break;
    939         }
    940     }
    941 
    942     // Chinese index characters, which are specific to each of the several Chinese tailorings,
    943     // take precedence over the single locale data exemplar set per language.
    944     if (!addChineseIndexCharacters(status) && locale != NULL) {
    945         addIndexExemplars(*locale, status);
    946     }
    947 }
    948 
    949 
    950 //
    951 //  Comparison function for UVector<UnicodeString *> sorting with a collator.
    952 //
    953 static int32_t U_CALLCONV
    954 collatorComparator(const void *context, const void *left, const void *right) {
    955     const UElement *leftElement = static_cast<const UElement *>(left);
    956     const UElement *rightElement = static_cast<const UElement *>(right);
    957     const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
    958     const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
    959 
    960     if (leftString == rightString) {
    961         // Catches case where both are NULL
    962         return 0;
    963     }
    964     if (leftString == NULL) {
    965         return 1;
    966     };
    967     if (rightString == NULL) {
    968         return -1;
    969     }
    970     const Collator *col = static_cast<const Collator *>(context);
    971     UErrorCode errorCode = U_ZERO_ERROR;
    972     return col->compare(*leftString, *rightString, errorCode);
    973 }
    974 
    975 //
    976 //  Comparison function for UVector<Record *> sorting with a collator.
    977 //
    978 static int32_t U_CALLCONV
    979 recordCompareFn(const void *context, const void *left, const void *right) {
    980     const UElement *leftElement = static_cast<const UElement *>(left);
    981     const UElement *rightElement = static_cast<const UElement *>(right);
    982     const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
    983     const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
    984     const Collator *col = static_cast<const Collator *>(context);
    985     UErrorCode errorCode = U_ZERO_ERROR;
    986     return col->compare(leftRec->name_, rightRec->name_, errorCode);
    987 }
    988 
    989 UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
    990     if (U_FAILURE(status)) {
    991         return NULL;
    992     }
    993     LocalPointer<UVector> dest(new UVector(status), status);
    994     if (U_FAILURE(status)) {
    995         return NULL;
    996     }
    997     dest->setDeleter(uprv_deleteUObject);
    998     // Fetch the script-first-primary contractions which are defined in the root collator.
    999     // They all start with U+FDD1.
   1000     UnicodeSet set;
   1001     collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
   1002     if (U_FAILURE(status)) {
   1003         return NULL;
   1004     }
   1005     if (set.isEmpty()) {
   1006         status = U_UNSUPPORTED_ERROR;
   1007         return NULL;
   1008     }
   1009     UnicodeSetIterator iter(set);
   1010     while (iter.next()) {
   1011         const UnicodeString &boundary = iter.getString();
   1012         uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
   1013         if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
   1014             // Ignore boundaries for the special reordering groups.
   1015             // Take only those for "real scripts" (where the sample character is a Letter,
   1016             // and the one for unassigned implicit weights (Cn).
   1017             continue;
   1018         }
   1019         UnicodeString *s = new UnicodeString(boundary);
   1020         if (s == NULL) {
   1021             status = U_MEMORY_ALLOCATION_ERROR;
   1022             return NULL;
   1023         }
   1024         dest->addElement(s, status);
   1025     }
   1026     return dest.orphan();
   1027 }
   1028 
   1029 
   1030 namespace {
   1031 
   1032 /**
   1033  * Returns true if one index character string is "better" than the other.
   1034  * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
   1035  * better, and otherwise binary-less-than is better.
   1036  */
   1037 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
   1038                                 const UnicodeString &one, const UnicodeString &other) {
   1039     // This is called with primary-equal strings, but never with one.equals(other).
   1040     UErrorCode status = U_ZERO_ERROR;
   1041     UnicodeString n1 = nfkdNormalizer.normalize(one, status);
   1042     UnicodeString n2 = nfkdNormalizer.normalize(other, status);
   1043     if (U_FAILURE(status)) { return FALSE; }
   1044     int32_t result = n1.countChar32() - n2.countChar32();
   1045     if (result != 0) {
   1046         return result < 0;
   1047     }
   1048     result = n1.compareCodePointOrder(n2);
   1049     if (result != 0) {
   1050         return result < 0;
   1051     }
   1052     return one.compareCodePointOrder(other) < 0;
   1053 }
   1054 
   1055 }  // namespace
   1056 
   1057 //
   1058 //  Constructor & Destructor for AlphabeticIndex::Record
   1059 //
   1060 //     Records are internal only, instances are not directly surfaced in the public API.
   1061 //     This class is mostly struct-like, with all public fields.
   1062 
   1063 AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
   1064         : name_(name), data_(data) {}
   1065 
   1066 AlphabeticIndex::Record::~Record() {
   1067 }
   1068 
   1069 
   1070 AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
   1071     if (U_FAILURE(status)) {
   1072         return *this;
   1073     }
   1074     if (inputList_ == NULL) {
   1075         inputList_ = new UVector(status);
   1076         if (inputList_ == NULL) {
   1077             status = U_MEMORY_ALLOCATION_ERROR;
   1078             return *this;
   1079         }
   1080         inputList_->setDeleter(alphaIndex_deleteRecord);
   1081     }
   1082     Record *r = new Record(name, data);
   1083     if (r == NULL) {
   1084         status = U_MEMORY_ALLOCATION_ERROR;
   1085         return *this;
   1086     }
   1087     inputList_->addElement(r, status);
   1088     clearBuckets();
   1089     //std::string ss;
   1090     //std::string ss2;
   1091     //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
   1092     //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
   1093     return *this;
   1094 }
   1095 
   1096 
   1097 AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
   1098     if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
   1099         inputList_->removeAllElements();
   1100         clearBuckets();
   1101     }
   1102     return *this;
   1103 }
   1104 
   1105 int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
   1106     initBuckets(status);
   1107     if (U_FAILURE(status)) {
   1108         return 0;
   1109     }
   1110     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
   1111 }
   1112 
   1113 
   1114 int32_t AlphabeticIndex::getBucketIndex() const {
   1115     return labelsIterIndex_;
   1116 }
   1117 
   1118 
   1119 UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
   1120     if (U_FAILURE(status)) {
   1121         return FALSE;
   1122     }
   1123     if (buckets_ == NULL && currentBucket_ != NULL) {
   1124         status = U_ENUM_OUT_OF_SYNC_ERROR;
   1125         return FALSE;
   1126     }
   1127     initBuckets(status);
   1128     if (U_FAILURE(status)) {
   1129         return FALSE;
   1130     }
   1131     ++labelsIterIndex_;
   1132     if (labelsIterIndex_ >= buckets_->getBucketCount()) {
   1133         labelsIterIndex_ = buckets_->getBucketCount();
   1134         return FALSE;
   1135     }
   1136     currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
   1137     resetRecordIterator();
   1138     return TRUE;
   1139 }
   1140 
   1141 const UnicodeString &AlphabeticIndex::getBucketLabel() const {
   1142     if (currentBucket_ != NULL) {
   1143         return currentBucket_->label_;
   1144     } else {
   1145         return emptyString_;
   1146     }
   1147 }
   1148 
   1149 
   1150 UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
   1151     if (currentBucket_ != NULL) {
   1152         return currentBucket_->labelType_;
   1153     } else {
   1154         return U_ALPHAINDEX_NORMAL;
   1155     }
   1156 }
   1157 
   1158 
   1159 int32_t AlphabeticIndex::getBucketRecordCount() const {
   1160     if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
   1161         return currentBucket_->records_->size();
   1162     } else {
   1163         return 0;
   1164     }
   1165 }
   1166 
   1167 
   1168 AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
   1169     if (U_FAILURE(status)) {
   1170         return *this;
   1171     }
   1172     internalResetBucketIterator();
   1173     return *this;
   1174 }
   1175 
   1176 
   1177 UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
   1178     if (U_FAILURE(status)) {
   1179         return FALSE;
   1180     }
   1181     if (currentBucket_ == NULL) {
   1182         // We are trying to iterate over the items in a bucket, but there is no
   1183         // current bucket from the enumeration of buckets.
   1184         status = U_INVALID_STATE_ERROR;
   1185         return FALSE;
   1186     }
   1187     if (buckets_ == NULL) {
   1188         status = U_ENUM_OUT_OF_SYNC_ERROR;
   1189         return FALSE;
   1190     }
   1191     if (currentBucket_->records_ == NULL) {
   1192         return FALSE;
   1193     }
   1194     ++itemsIterIndex_;
   1195     if (itemsIterIndex_ >= currentBucket_->records_->size()) {
   1196         itemsIterIndex_  = currentBucket_->records_->size();
   1197         return FALSE;
   1198     }
   1199     return TRUE;
   1200 }
   1201 
   1202 
   1203 const UnicodeString &AlphabeticIndex::getRecordName() const {
   1204     const UnicodeString *retStr = &emptyString_;
   1205     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
   1206         itemsIterIndex_ >= 0 &&
   1207         itemsIterIndex_ < currentBucket_->records_->size()) {
   1208             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
   1209             retStr = &item->name_;
   1210     }
   1211     return *retStr;
   1212 }
   1213 
   1214 const void *AlphabeticIndex::getRecordData() const {
   1215     const void *retPtr = NULL;
   1216     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
   1217         itemsIterIndex_ >= 0 &&
   1218         itemsIterIndex_ < currentBucket_->records_->size()) {
   1219             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
   1220             retPtr = item->data_;
   1221     }
   1222     return retPtr;
   1223 }
   1224 
   1225 
   1226 AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
   1227     itemsIterIndex_ = -1;
   1228     return *this;
   1229 }
   1230 
   1231 
   1232 
   1233 AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
   1234                                 const UnicodeString &lowerBoundary,
   1235                                 UAlphabeticIndexLabelType type)
   1236         : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
   1237           displayBucket_(NULL), displayIndex_(-1),
   1238           records_(NULL) {
   1239 }
   1240 
   1241 
   1242 AlphabeticIndex::Bucket::~Bucket() {
   1243     delete records_;
   1244 }
   1245 
   1246 U_NAMESPACE_END
   1247 
   1248 #endif  // !UCONFIG_NO_COLLATION
   1249