Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2009-2014, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 *******************************************************************************
      6 */
      7 
      8 #include "unicode/utypes.h"
      9 
     10 #if !UCONFIG_NO_COLLATION
     11 
     12 #include "unicode/alphaindex.h"
     13 #include "unicode/coll.h"
     14 #include "unicode/localpointer.h"
     15 #include "unicode/normalizer2.h"
     16 #include "unicode/tblcoll.h"
     17 #include "unicode/uchar.h"
     18 #include "unicode/ulocdata.h"
     19 #include "unicode/uniset.h"
     20 #include "unicode/uobject.h"
     21 #include "unicode/usetiter.h"
     22 #include "unicode/utf16.h"
     23 
     24 #include "cmemory.h"
     25 #include "cstring.h"
     26 #include "uassert.h"
     27 #include "uvector.h"
     28 #include "uvectr64.h"
     29 
     30 //#include <string>
     31 //#include <iostream>
     32 
     33 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     34 
     35 U_NAMESPACE_BEGIN
     36 
     37 namespace {
     38 
     39 /**
     40  * Prefix string for Chinese index buckets.
     41  * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
     42  */
     43 const UChar BASE[1] = { 0xFDD0 };
     44 const int32_t BASE_LENGTH = 1;
     45 
     46 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
     47                                 const UnicodeString &one, const UnicodeString &other);
     48 
     49 }  // namespace
     50 
     51 static int32_t U_CALLCONV
     52 collatorComparator(const void *context, const void *left, const void *right);
     53 
     54 static int32_t U_CALLCONV
     55 recordCompareFn(const void *context, const void *left, const void *right);
     56 
     57 //  UVector<Record *> support function, delete a Record.
     58 static void U_CALLCONV
     59 alphaIndex_deleteRecord(void *obj) {
     60     delete static_cast<AlphabeticIndex::Record *>(obj);
     61 }
     62 
     63 namespace {
     64 
     65 UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
     66                            UErrorCode &errorCode) {
     67     if (U_FAILURE(errorCode)) { return NULL; }
     68     if (owned.isValid()) {
     69         return owned.orphan();
     70     }
     71     UnicodeString *p = new UnicodeString(s);
     72     if (p == NULL) {
     73         errorCode = U_MEMORY_ALLOCATION_ERROR;
     74     }
     75     return p;
     76 }
     77 
     78 inline UnicodeString *getString(const UVector &list, int32_t i) {
     79     return static_cast<UnicodeString *>(list[i]);
     80 }
     81 
     82 inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
     83     return static_cast<AlphabeticIndex::Bucket *>(list[i]);
     84 }
     85 
     86 inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
     87     return static_cast<AlphabeticIndex::Record *>(list[i]);
     88 }
     89 
     90 /**
     91  * Like Java Collections.binarySearch(List, String, Comparator).
     92  *
     93  * @return the index>=0 where the item was found,
     94  *         or the index<0 for inserting the string at ~index in sorted order
     95  */
     96 int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
     97     if (list.size() == 0) { return ~0; }
     98     int32_t start = 0;
     99     int32_t limit = list.size();
    100     for (;;) {
    101         int32_t i = (start + limit) / 2;
    102         const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
    103         UErrorCode errorCode = U_ZERO_ERROR;
    104         UCollationResult cmp = coll.compare(s, *si, errorCode);
    105         if (cmp == UCOL_EQUAL) {
    106             return i;
    107         } else if (cmp < 0) {
    108             if (i == start) {
    109                 return ~start;  // insert s before *si
    110             }
    111             limit = i;
    112         } else {
    113             if (i == start) {
    114                 return ~(start + 1);  // insert s after *si
    115             }
    116             start = i;
    117         }
    118     }
    119 }
    120 
    121 }  // namespace
    122 
    123 // The BucketList is not in the anonymous namespace because only Clang
    124 // seems to support its use in other classes from there.
    125 // However, we also don't need U_I18N_API because it is not used from outside the i18n library.
    126 class BucketList : public UObject {
    127 public:
    128     BucketList(UVector *bucketList, UVector *publicBucketList)
    129             : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
    130         int32_t displayIndex = 0;
    131         for (int32_t i = 0; i < publicBucketList->size(); ++i) {
    132             getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
    133         }
    134     }
    135 
    136     // The virtual destructor must not be inline.
    137     // See ticket #8454 for details.
    138     virtual ~BucketList();
    139 
    140     int32_t getBucketCount() const {
    141         return immutableVisibleList_->size();
    142     }
    143 
    144     int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
    145                            UErrorCode &errorCode) {
    146         // binary search
    147         int32_t start = 0;
    148         int32_t limit = bucketList_->size();
    149         while ((start + 1) < limit) {
    150             int32_t i = (start + limit) / 2;
    151             const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
    152             UCollationResult nameVsBucket =
    153                 collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
    154             if (nameVsBucket < 0) {
    155                 limit = i;
    156             } else {
    157                 start = i;
    158             }
    159         }
    160         const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
    161         if (bucket->displayBucket_ != NULL) {
    162             bucket = bucket->displayBucket_;
    163         }
    164         return bucket->displayIndex_;
    165     }
    166 
    167     /** All of the buckets, visible and invisible. */
    168     UVector *bucketList_;
    169     /** Just the visible buckets. */
    170     UVector *immutableVisibleList_;
    171 };
    172 
    173 BucketList::~BucketList() {
    174     delete bucketList_;
    175     if (immutableVisibleList_ != bucketList_) {
    176         delete immutableVisibleList_;
    177     }
    178 }
    179 
    180 AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
    181     delete buckets_;
    182     delete collatorPrimaryOnly_;
    183 }
    184 
    185 int32_t
    186 AlphabeticIndex::ImmutableIndex::getBucketCount() const {
    187     return buckets_->getBucketCount();
    188 }
    189 
    190 int32_t
    191 AlphabeticIndex::ImmutableIndex::getBucketIndex(
    192         const UnicodeString &name, UErrorCode &errorCode) const {
    193     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
    194 }
    195 
    196 const AlphabeticIndex::Bucket *
    197 AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
    198     if (0 <= index && index < buckets_->getBucketCount()) {
    199         return icu::getBucket(*buckets_->immutableVisibleList_, index);
    200     } else {
    201         return NULL;
    202     }
    203 }
    204 
    205 AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
    206         : inputList_(NULL),
    207           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
    208           maxLabelCount_(99),
    209           initialLabels_(NULL), firstCharsInScripts_(NULL),
    210           collator_(NULL), collatorPrimaryOnly_(NULL),
    211           buckets_(NULL) {
    212     init(&locale, status);
    213 }
    214 
    215 
    216 AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
    217         : inputList_(NULL),
    218           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
    219           maxLabelCount_(99),
    220           initialLabels_(NULL), firstCharsInScripts_(NULL),
    221           collator_(collator), collatorPrimaryOnly_(NULL),
    222           buckets_(NULL) {
    223     init(NULL, status);
    224 }
    225 
    226 
    227 
    228 AlphabeticIndex::~AlphabeticIndex() {
    229     delete collator_;
    230     delete collatorPrimaryOnly_;
    231     delete firstCharsInScripts_;
    232     delete buckets_;
    233     delete inputList_;
    234     delete initialLabels_;
    235 }
    236 
    237 
    238 AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
    239     if (U_FAILURE(status)) {
    240         return *this;
    241     }
    242     initialLabels_->addAll(additions);
    243     clearBuckets();
    244     return *this;
    245 }
    246 
    247 
    248 AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
    249     addIndexExemplars(locale, status);
    250     clearBuckets();
    251     return *this;
    252 }
    253 
    254 
    255 AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
    256     if (U_FAILURE(errorCode)) { return NULL; }
    257     // In C++, the ImmutableIndex must own its copy of the BucketList,
    258     // even if it contains no records, for proper memory management.
    259     // We could clone the buckets_ if they are not NULL,
    260     // but that would be worth it only if this method is called multiple times,
    261     // or called after using the old-style bucket iterator API.
    262     LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
    263     LocalPointer<RuleBasedCollator> coll(
    264         static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
    265     if (immutableBucketList.isNull() || coll.isNull()) {
    266         errorCode = U_MEMORY_ALLOCATION_ERROR;
    267         return NULL;
    268     }
    269     ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
    270     if (immIndex == NULL) {
    271         errorCode = U_MEMORY_ALLOCATION_ERROR;
    272         return NULL;
    273     }
    274     // The ImmutableIndex adopted its parameter objects.
    275     immutableBucketList.orphan();
    276     coll.orphan();
    277     return immIndex;
    278 }
    279 
    280 int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
    281     initBuckets(status);
    282     if (U_FAILURE(status)) {
    283         return 0;
    284     }
    285     return buckets_->getBucketCount();
    286 }
    287 
    288 
    289 int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
    290     if (U_FAILURE(status) || inputList_ == NULL) {
    291         return 0;
    292     }
    293     return inputList_->size();
    294 }
    295 
    296 void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
    297     const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
    298     if (U_FAILURE(errorCode)) { return; }
    299 
    300     const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
    301     const UnicodeString &overflowBoundary =
    302         *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
    303 
    304     // We make a sorted array of elements.
    305     // Some of the input may be redundant.
    306     // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
    307     // We filter out those cases.
    308     UnicodeSetIterator iter(*initialLabels_);
    309     while (iter.next()) {
    310         const UnicodeString *item = &iter.getString();
    311         LocalPointer<UnicodeString> ownedItem;
    312         UBool checkDistinct;
    313         int32_t itemLength = item->length();
    314         if (!item->hasMoreChar32Than(0, itemLength, 1)) {
    315             checkDistinct = FALSE;
    316         } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
    317                 item->charAt(itemLength - 2) != 0x2a) {
    318             // Use a label if it is marked with one trailing star,
    319             // even if the label string sorts the same when all contractions are suppressed.
    320             ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
    321             item = ownedItem.getAlias();
    322             if (item == NULL) {
    323                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    324                 return;
    325             }
    326             checkDistinct = FALSE;
    327         } else {
    328             checkDistinct = TRUE;
    329         }
    330         if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
    331             // Ignore a primary-ignorable or non-alphabetic index character.
    332         } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
    333             // Ignore an index character that will land in the overflow bucket.
    334         } else if (checkDistinct &&
    335                 collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
    336             // Ignore a multi-code point index character that does not sort distinctly
    337             // from the sequence of its separate characters.
    338         } else {
    339             int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
    340             if (insertionPoint < 0) {
    341                 indexCharacters.insertElementAt(
    342                     ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
    343             } else {
    344                 const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
    345                 if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
    346                     indexCharacters.setElementAt(
    347                         ownedString(*item, ownedItem, errorCode), insertionPoint);
    348                 }
    349             }
    350         }
    351     }
    352     if (U_FAILURE(errorCode)) { return; }
    353 
    354     // if the result is still too large, cut down to maxCount elements, by removing every nth element
    355 
    356     int32_t size = indexCharacters.size() - 1;
    357     if (size > maxLabelCount_) {
    358         int32_t count = 0;
    359         int32_t old = -1;
    360         for (int32_t i = 0; i < indexCharacters.size();) {
    361             ++count;
    362             int32_t bump = count * maxLabelCount_ / size;
    363             if (bump == old) {
    364                 indexCharacters.removeElementAt(i);
    365             } else {
    366                 old = bump;
    367                 ++i;
    368             }
    369         }
    370     }
    371 }
    372 
    373 namespace {
    374 
    375 const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
    376     if (!current.startsWith(BASE, BASE_LENGTH)) {
    377         return current;
    378     }
    379     UChar rest = current.charAt(BASE_LENGTH);
    380     if (0x2800 < rest && rest <= 0x28FF) { // stroke count
    381         int32_t count = rest-0x2800;
    382         temp.setTo((UChar)(0x30 + count % 10));
    383         if (count >= 10) {
    384             count /= 10;
    385             temp.insert(0, (UChar)(0x30 + count % 10));
    386             if (count >= 10) {
    387                 count /= 10;
    388                 temp.insert(0, (UChar)(0x30 + count));
    389             }
    390         }
    391         return temp.append((UChar)0x5283);
    392     }
    393     return temp.setTo(current, BASE_LENGTH);
    394 }
    395 
    396 UBool hasMultiplePrimaryWeights(
    397         const RuleBasedCollator &coll, uint32_t variableTop,
    398         const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
    399     ces.removeAllElements();
    400     coll.internalGetCEs(s, ces, errorCode);
    401     if (U_FAILURE(errorCode)) { return FALSE; }
    402     UBool seenPrimary = FALSE;
    403     for (int32_t i = 0; i < ces.size(); ++i) {
    404         int64_t ce = ces.elementAti(i);
    405         uint32_t p = (uint32_t)(ce >> 32);
    406         if (p > variableTop) {
    407             // not primary ignorable
    408             if (seenPrimary) {
    409                 return TRUE;
    410             }
    411             seenPrimary = TRUE;
    412         }
    413     }
    414     return FALSE;
    415 }
    416 
    417 }  // namespace
    418 
    419 BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
    420     // Initialize indexCharacters.
    421     UVector indexCharacters(errorCode);
    422     indexCharacters.setDeleter(uprv_deleteUObject);
    423     initLabels(indexCharacters, errorCode);
    424     if (U_FAILURE(errorCode)) { return NULL; }
    425 
    426     // Variables for hasMultiplePrimaryWeights().
    427     UVector64 ces(errorCode);
    428     uint32_t variableTop;
    429     if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
    430         variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
    431     } else {
    432         variableTop = 0;
    433     }
    434     UBool hasInvisibleBuckets = FALSE;
    435 
    436     // Helper arrays for Chinese Pinyin collation.
    437     Bucket *asciiBuckets[26] = {
    438         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    439         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
    440     };
    441     Bucket *pinyinBuckets[26] = {
    442         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    443         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
    444     };
    445     UBool hasPinyin = FALSE;
    446 
    447     LocalPointer<UVector> bucketList(new UVector(errorCode));
    448     if (bucketList.isNull()) {
    449         errorCode = U_MEMORY_ALLOCATION_ERROR;
    450         return NULL;
    451     }
    452     bucketList->setDeleter(uprv_deleteUObject);
    453 
    454     // underflow bucket
    455     Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
    456     if (bucket == NULL) {
    457         errorCode = U_MEMORY_ALLOCATION_ERROR;
    458         return NULL;
    459     }
    460     bucketList->addElement(bucket, errorCode);
    461     if (U_FAILURE(errorCode)) { return NULL; }
    462 
    463     UnicodeString temp;
    464 
    465     // fix up the list, adding underflow, additions, overflow
    466     // Insert inflow labels as needed.
    467     int32_t scriptIndex = -1;
    468     const UnicodeString *scriptUpperBoundary = &emptyString_;
    469     for (int32_t i = 0; i < indexCharacters.size(); ++i) {
    470         UnicodeString &current = *getString(indexCharacters, i);
    471         if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
    472             // We crossed the script boundary into a new script.
    473             const UnicodeString &inflowBoundary = *scriptUpperBoundary;
    474             UBool skippedScript = FALSE;
    475             for (;;) {
    476                 scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
    477                 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
    478                     break;
    479                 }
    480                 skippedScript = TRUE;
    481             }
    482             if (skippedScript && bucketList->size() > 1) {
    483                 // We are skipping one or more scripts,
    484                 // and we are not just getting out of the underflow label.
    485                 bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
    486                 if (bucket == NULL) {
    487                     errorCode = U_MEMORY_ALLOCATION_ERROR;
    488                     return NULL;
    489                 }
    490                 bucketList->addElement(bucket, errorCode);
    491             }
    492         }
    493         // Add a bucket with the current label.
    494         bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
    495         if (bucket == NULL) {
    496             errorCode = U_MEMORY_ALLOCATION_ERROR;
    497             return NULL;
    498         }
    499         bucketList->addElement(bucket, errorCode);
    500         // Remember ASCII and Pinyin buckets for Pinyin redirects.
    501         UChar c;
    502         if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
    503             asciiBuckets[c - 0x41] = bucket;
    504         } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
    505                 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
    506             pinyinBuckets[c - 0x41] = bucket;
    507             hasPinyin = TRUE;
    508         }
    509         // Check for multiple primary weights.
    510         if (!current.startsWith(BASE, BASE_LENGTH) &&
    511                 hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
    512                                           ces, errorCode) &&
    513                 current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
    514             // "AE-ligature" or "Sch" etc.
    515             for (int32_t i = bucketList->size() - 2;; --i) {
    516                 Bucket *singleBucket = getBucket(*bucketList, i);
    517                 if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
    518                     // There is no single-character bucket since the last
    519                     // underflow or inflow label.
    520                     break;
    521                 }
    522                 if (singleBucket->displayBucket_ == NULL &&
    523                         !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
    524                                                    singleBucket->lowerBoundary_,
    525                                                    ces, errorCode)) {
    526                     // Add an invisible bucket that redirects strings greater than the expansion
    527                     // to the previous single-character bucket.
    528                     // For example, after ... Q R S Sch we add Sch\uFFFF->S
    529                     // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
    530                     bucket = new Bucket(emptyString_,
    531                         UnicodeString(current).append((UChar)0xFFFF),
    532                         U_ALPHAINDEX_NORMAL);
    533                     if (bucket == NULL) {
    534                         errorCode = U_MEMORY_ALLOCATION_ERROR;
    535                         return NULL;
    536                     }
    537                     bucket->displayBucket_ = singleBucket;
    538                     bucketList->addElement(bucket, errorCode);
    539                     hasInvisibleBuckets = TRUE;
    540                     break;
    541                 }
    542             }
    543         }
    544     }
    545     if (U_FAILURE(errorCode)) { return NULL; }
    546     if (bucketList->size() == 1) {
    547         // No real labels, show only the underflow label.
    548         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
    549         if (bl == NULL) {
    550             errorCode = U_MEMORY_ALLOCATION_ERROR;
    551             return NULL;
    552         }
    553         bucketList.orphan();
    554         return bl;
    555     }
    556     // overflow bucket
    557     bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
    558     if (bucket == NULL) {
    559         errorCode = U_MEMORY_ALLOCATION_ERROR;
    560         return NULL;
    561     }
    562     bucketList->addElement(bucket, errorCode); // final
    563 
    564     if (hasPinyin) {
    565         // Redirect Pinyin buckets.
    566         Bucket *asciiBucket = NULL;
    567         for (int32_t i = 0; i < 26; ++i) {
    568             if (asciiBuckets[i] != NULL) {
    569                 asciiBucket = asciiBuckets[i];
    570             }
    571             if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
    572                 pinyinBuckets[i]->displayBucket_ = asciiBucket;
    573                 hasInvisibleBuckets = TRUE;
    574             }
    575         }
    576     }
    577 
    578     if (U_FAILURE(errorCode)) { return NULL; }
    579     if (!hasInvisibleBuckets) {
    580         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
    581         if (bl == NULL) {
    582             errorCode = U_MEMORY_ALLOCATION_ERROR;
    583             return NULL;
    584         }
    585         bucketList.orphan();
    586         return bl;
    587     }
    588     // Merge inflow buckets that are visually adjacent.
    589     // Iterate backwards: Merge inflow into overflow rather than the other way around.
    590     int32_t i = bucketList->size() - 1;
    591     Bucket *nextBucket = getBucket(*bucketList, i);
    592     while (--i > 0) {
    593         bucket = getBucket(*bucketList, i);
    594         if (bucket->displayBucket_ != NULL) {
    595             continue;  // skip invisible buckets
    596         }
    597         if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
    598             if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
    599                 bucket->displayBucket_ = nextBucket;
    600                 continue;
    601             }
    602         }
    603         nextBucket = bucket;
    604     }
    605 
    606     LocalPointer<UVector> publicBucketList(new UVector(errorCode));
    607     if (bucketList.isNull()) {
    608         errorCode = U_MEMORY_ALLOCATION_ERROR;
    609         return NULL;
    610     }
    611     // Do not call publicBucketList->setDeleter():
    612     // This vector shares its objects with the bucketList.
    613     for (int32_t i = 0; i < bucketList->size(); ++i) {
    614         bucket = getBucket(*bucketList, i);
    615         if (bucket->displayBucket_ == NULL) {
    616             publicBucketList->addElement(bucket, errorCode);
    617         }
    618     }
    619     if (U_FAILURE(errorCode)) { return NULL; }
    620     BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
    621     if (bl == NULL) {
    622         errorCode = U_MEMORY_ALLOCATION_ERROR;
    623         return NULL;
    624     }
    625     bucketList.orphan();
    626     publicBucketList.orphan();
    627     return bl;
    628 }
    629 
    630 /**
    631  * Creates an index, and buckets and sorts the list of records into the index.
    632  */
    633 void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
    634     if (U_FAILURE(errorCode) || buckets_ != NULL) {
    635         return;
    636     }
    637     buckets_ = createBucketList(errorCode);
    638     if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
    639         return;
    640     }
    641 
    642     // Sort the records by name.
    643     // Stable sort preserves input order of collation duplicates.
    644     inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
    645 
    646     // Now, we traverse all of the input, which is now sorted.
    647     // If the item doesn't go in the current bucket, we find the next bucket that contains it.
    648     // This makes the process order n*log(n), since we just sort the list and then do a linear process.
    649     // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
    650     // we need to improve it for that case.
    651 
    652     Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
    653     int32_t bucketIndex = 1;
    654     Bucket *nextBucket;
    655     const UnicodeString *upperBoundary;
    656     if (bucketIndex < buckets_->bucketList_->size()) {
    657         nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
    658         upperBoundary = &nextBucket->lowerBoundary_;
    659     } else {
    660         nextBucket = NULL;
    661         upperBoundary = NULL;
    662     }
    663     for (int32_t i = 0; i < inputList_->size(); ++i) {
    664         Record *r = getRecord(*inputList_, i);
    665         // if the current bucket isn't the right one, find the one that is
    666         // We have a special flag for the last bucket so that we don't look any further
    667         while (upperBoundary != NULL &&
    668                 collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
    669             currentBucket = nextBucket;
    670             // now reset the boundary that we compare against
    671             if (bucketIndex < buckets_->bucketList_->size()) {
    672                 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
    673                 upperBoundary = &nextBucket->lowerBoundary_;
    674             } else {
    675                 upperBoundary = NULL;
    676             }
    677         }
    678         // now put the record into the bucket.
    679         Bucket *bucket = currentBucket;
    680         if (bucket->displayBucket_ != NULL) {
    681             bucket = bucket->displayBucket_;
    682         }
    683         if (bucket->records_ == NULL) {
    684             bucket->records_ = new UVector(errorCode);
    685             if (bucket->records_ == NULL) {
    686                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    687                 return;
    688             }
    689         }
    690         bucket->records_->addElement(r, errorCode);
    691     }
    692 }
    693 
    694 void AlphabeticIndex::clearBuckets() {
    695     if (buckets_ != NULL) {
    696         delete buckets_;
    697         buckets_ = NULL;
    698         internalResetBucketIterator();
    699     }
    700 }
    701 
    702 void AlphabeticIndex::internalResetBucketIterator() {
    703     labelsIterIndex_ = -1;
    704     currentBucket_ = NULL;
    705 }
    706 
    707 
    708 void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
    709     LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
    710     if (U_FAILURE(status)) {
    711         return;
    712     }
    713 
    714     UnicodeSet exemplars;
    715     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
    716     if (U_SUCCESS(status)) {
    717         initialLabels_->addAll(exemplars);
    718         return;
    719     }
    720     status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
    721 
    722     // The locale data did not include explicit Index characters.
    723     // Synthesize a set of them from the locale's standard exemplar characters.
    724     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
    725     if (U_FAILURE(status)) {
    726         return;
    727     }
    728 
    729     // question: should we add auxiliary exemplars?
    730     if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
    731         exemplars.add(0x61, 0x7A);
    732     }
    733     if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
    734         // cut down to small list
    735         exemplars.remove(0xAC00, 0xD7A3).
    736             add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
    737             add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
    738             add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
    739             add(0xD30C).add(0xD558);
    740     }
    741     if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
    742         // cut down to small list
    743         // make use of the fact that Ethiopic is allocated in 8's, where
    744         // the base is 0 mod 8.
    745         UnicodeSet ethiopic(
    746             UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
    747         UnicodeSetIterator it(ethiopic);
    748         while (it.next() && !it.isString()) {
    749             if ((it.getCodepoint() & 0x7) != 0) {
    750                 exemplars.remove(it.getCodepoint());
    751             }
    752         }
    753     }
    754 
    755     // Upper-case any that aren't already so.
    756     //   (We only do this for synthesized index characters.)
    757     UnicodeSetIterator it(exemplars);
    758     UnicodeString upperC;
    759     while (it.next()) {
    760         const UnicodeString &exemplarC = it.getString();
    761         upperC = exemplarC;
    762         upperC.toUpper(locale);
    763         initialLabels_->add(upperC);
    764     }
    765 }
    766 
    767 UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
    768     UnicodeSet contractions;
    769     collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
    770     if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
    771     initialLabels_->addAll(contractions);
    772     UnicodeSetIterator iter(contractions);
    773     while (iter.next()) {
    774         const UnicodeString &s = iter.getString();
    775         U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
    776         UChar c = s.charAt(s.length() - 1);
    777         if (0x41 <= c && c <= 0x5A) {  // A-Z
    778             // There are Pinyin labels, add ASCII A-Z labels as well.
    779             initialLabels_->add(0x41, 0x5A);  // A-Z
    780             break;
    781         }
    782     }
    783     return TRUE;
    784 }
    785 
    786 
    787 /*
    788  * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
    789  */
    790 static const UChar CGJ = 0x034F;
    791 UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
    792     UnicodeString result;
    793     if (item.length() == 0) {
    794         return result;
    795     }
    796     int32_t i = 0;
    797     for (;;) {
    798         UChar32  cp = item.char32At(i);
    799         result.append(cp);
    800         i = item.moveIndex32(i, 1);
    801         if (i >= item.length()) {
    802             break;
    803         }
    804         result.append(CGJ);
    805     }
    806     return result;
    807 }
    808 
    809 
    810 UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
    811     return FALSE;
    812 }
    813 
    814 
    815 UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
    816     return FALSE;
    817 }
    818 
    819 
    820 const RuleBasedCollator &AlphabeticIndex::getCollator() const {
    821     return *collator_;
    822 }
    823 
    824 
    825 const UnicodeString &AlphabeticIndex::getInflowLabel() const {
    826     return inflowLabel_;
    827 }
    828 
    829 const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
    830     return overflowLabel_;
    831 }
    832 
    833 
    834 const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
    835     return underflowLabel_;
    836 }
    837 
    838 
    839 AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
    840     inflowLabel_ = label;
    841     clearBuckets();
    842     return *this;
    843 }
    844 
    845 
    846 AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
    847     overflowLabel_ = label;
    848     clearBuckets();
    849     return *this;
    850 }
    851 
    852 
    853 AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
    854     underflowLabel_ = label;
    855     clearBuckets();
    856     return *this;
    857 }
    858 
    859 
    860 int32_t AlphabeticIndex::getMaxLabelCount() const {
    861     return maxLabelCount_;
    862 }
    863 
    864 
    865 AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
    866     if (U_FAILURE(status)) {
    867         return *this;
    868     }
    869     if (maxLabelCount <= 0) {
    870         status = U_ILLEGAL_ARGUMENT_ERROR;
    871         return *this;
    872     }
    873     maxLabelCount_ = maxLabelCount;
    874     clearBuckets();
    875     return *this;
    876 }
    877 
    878 
    879 //
    880 //  init() - Common code for constructors.
    881 //
    882 
    883 void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
    884     if (U_FAILURE(status)) { return; }
    885     if (locale == NULL && collator_ == NULL) {
    886         status = U_ILLEGAL_ARGUMENT_ERROR;
    887         return;
    888     }
    889 
    890     initialLabels_         = new UnicodeSet();
    891     if (initialLabels_ == NULL) {
    892         status = U_MEMORY_ALLOCATION_ERROR;
    893         return;
    894     }
    895 
    896     inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
    897     overflowLabel_ = inflowLabel_;
    898     underflowLabel_ = inflowLabel_;
    899 
    900     if (collator_ == NULL) {
    901         Collator *coll = Collator::createInstance(*locale, status);
    902         if (U_FAILURE(status)) {
    903             delete coll;
    904             return;
    905         }
    906         if (coll == NULL) {
    907             status = U_MEMORY_ALLOCATION_ERROR;
    908             return;
    909         }
    910         collator_ = dynamic_cast<RuleBasedCollator *>(coll);
    911         if (collator_ == NULL) {
    912             delete coll;
    913             status = U_UNSUPPORTED_ERROR;
    914             return;
    915         }
    916     }
    917     collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
    918     if (collatorPrimaryOnly_ == NULL) {
    919         status = U_MEMORY_ALLOCATION_ERROR;
    920         return;
    921     }
    922     collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
    923     firstCharsInScripts_ = firstStringsInScript(status);
    924     if (U_FAILURE(status)) { return; }
    925     firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
    926     // Guard against a degenerate collator where
    927     // some script boundary strings are primary ignorable.
    928     for (;;) {
    929         if (U_FAILURE(status)) { return; }
    930         if (firstCharsInScripts_->isEmpty()) {
    931             // AlphabeticIndex requires some non-ignorable script boundary strings.
    932             status = U_ILLEGAL_ARGUMENT_ERROR;
    933             return;
    934         }
    935         if (collatorPrimaryOnly_->compare(
    936                 *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
    937                 emptyString_, status) == UCOL_EQUAL) {
    938             firstCharsInScripts_->removeElementAt(0);
    939         } else {
    940             break;
    941         }
    942     }
    943 
    944     // Chinese index characters, which are specific to each of the several Chinese tailorings,
    945     // take precedence over the single locale data exemplar set per language.
    946     if (!addChineseIndexCharacters(status) && locale != NULL) {
    947         addIndexExemplars(*locale, status);
    948     }
    949 }
    950 
    951 
    952 //
    953 //  Comparison function for UVector<UnicodeString *> sorting with a collator.
    954 //
    955 static int32_t U_CALLCONV
    956 collatorComparator(const void *context, const void *left, const void *right) {
    957     const UElement *leftElement = static_cast<const UElement *>(left);
    958     const UElement *rightElement = static_cast<const UElement *>(right);
    959     const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
    960     const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
    961 
    962     if (leftString == rightString) {
    963         // Catches case where both are NULL
    964         return 0;
    965     }
    966     if (leftString == NULL) {
    967         return 1;
    968     };
    969     if (rightString == NULL) {
    970         return -1;
    971     }
    972     const Collator *col = static_cast<const Collator *>(context);
    973     UErrorCode errorCode = U_ZERO_ERROR;
    974     return col->compare(*leftString, *rightString, errorCode);
    975 }
    976 
    977 //
    978 //  Comparison function for UVector<Record *> sorting with a collator.
    979 //
    980 static int32_t U_CALLCONV
    981 recordCompareFn(const void *context, const void *left, const void *right) {
    982     const UElement *leftElement = static_cast<const UElement *>(left);
    983     const UElement *rightElement = static_cast<const UElement *>(right);
    984     const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
    985     const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
    986     const Collator *col = static_cast<const Collator *>(context);
    987     UErrorCode errorCode = U_ZERO_ERROR;
    988     return col->compare(leftRec->name_, rightRec->name_, errorCode);
    989 }
    990 
    991 UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
    992     if (U_FAILURE(status)) {
    993         return NULL;
    994     }
    995     LocalPointer<UVector> dest(new UVector(status));
    996     if (dest.isNull()) {
    997         status = U_MEMORY_ALLOCATION_ERROR;
    998         return NULL;
    999     }
   1000     dest->setDeleter(uprv_deleteUObject);
   1001     // Fetch the script-first-primary contractions which are defined in the root collator.
   1002     // They all start with U+FDD1.
   1003     UnicodeSet set;
   1004     collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
   1005     if (U_FAILURE(status)) {
   1006         return NULL;
   1007     }
   1008     if (set.isEmpty()) {
   1009         status = U_UNSUPPORTED_ERROR;
   1010         return NULL;
   1011     }
   1012     UnicodeSetIterator iter(set);
   1013     while (iter.next()) {
   1014         const UnicodeString &boundary = iter.getString();
   1015         uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
   1016         if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
   1017             // Ignore boundaries for the special reordering groups.
   1018             // Take only those for "real scripts" (where the sample character is a Letter,
   1019             // and the one for unassigned implicit weights (Cn).
   1020             continue;
   1021         }
   1022         UnicodeString *s = new UnicodeString(boundary);
   1023         if (s == NULL) {
   1024             status = U_MEMORY_ALLOCATION_ERROR;
   1025             return NULL;
   1026         }
   1027         dest->addElement(s, status);
   1028     }
   1029     return dest.orphan();
   1030 }
   1031 
   1032 
   1033 namespace {
   1034 
   1035 /**
   1036  * Returns true if one index character string is "better" than the other.
   1037  * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
   1038  * better, and otherwise binary-less-than is better.
   1039  */
   1040 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
   1041                                 const UnicodeString &one, const UnicodeString &other) {
   1042     // This is called with primary-equal strings, but never with one.equals(other).
   1043     UErrorCode status = U_ZERO_ERROR;
   1044     UnicodeString n1 = nfkdNormalizer.normalize(one, status);
   1045     UnicodeString n2 = nfkdNormalizer.normalize(other, status);
   1046     if (U_FAILURE(status)) { return FALSE; }
   1047     int32_t result = n1.countChar32() - n2.countChar32();
   1048     if (result != 0) {
   1049         return result < 0;
   1050     }
   1051     result = n1.compareCodePointOrder(n2);
   1052     if (result != 0) {
   1053         return result < 0;
   1054     }
   1055     return one.compareCodePointOrder(other) < 0;
   1056 }
   1057 
   1058 }  // namespace
   1059 
   1060 //
   1061 //  Constructor & Destructor for AlphabeticIndex::Record
   1062 //
   1063 //     Records are internal only, instances are not directly surfaced in the public API.
   1064 //     This class is mostly struct-like, with all public fields.
   1065 
   1066 AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
   1067         : name_(name), data_(data) {}
   1068 
   1069 AlphabeticIndex::Record::~Record() {
   1070 }
   1071 
   1072 
   1073 AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
   1074     if (U_FAILURE(status)) {
   1075         return *this;
   1076     }
   1077     if (inputList_ == NULL) {
   1078         inputList_ = new UVector(status);
   1079         if (inputList_ == NULL) {
   1080             status = U_MEMORY_ALLOCATION_ERROR;
   1081             return *this;
   1082         }
   1083         inputList_->setDeleter(alphaIndex_deleteRecord);
   1084     }
   1085     Record *r = new Record(name, data);
   1086     if (r == NULL) {
   1087         status = U_MEMORY_ALLOCATION_ERROR;
   1088         return *this;
   1089     }
   1090     inputList_->addElement(r, status);
   1091     clearBuckets();
   1092     //std::string ss;
   1093     //std::string ss2;
   1094     //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
   1095     //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
   1096     return *this;
   1097 }
   1098 
   1099 
   1100 AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
   1101     if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
   1102         inputList_->removeAllElements();
   1103         clearBuckets();
   1104     }
   1105     return *this;
   1106 }
   1107 
   1108 int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
   1109     initBuckets(status);
   1110     if (U_FAILURE(status)) {
   1111         return 0;
   1112     }
   1113     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
   1114 }
   1115 
   1116 
   1117 int32_t AlphabeticIndex::getBucketIndex() const {
   1118     return labelsIterIndex_;
   1119 }
   1120 
   1121 
   1122 UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
   1123     if (U_FAILURE(status)) {
   1124         return FALSE;
   1125     }
   1126     if (buckets_ == NULL && currentBucket_ != NULL) {
   1127         status = U_ENUM_OUT_OF_SYNC_ERROR;
   1128         return FALSE;
   1129     }
   1130     initBuckets(status);
   1131     if (U_FAILURE(status)) {
   1132         return FALSE;
   1133     }
   1134     ++labelsIterIndex_;
   1135     if (labelsIterIndex_ >= buckets_->getBucketCount()) {
   1136         labelsIterIndex_ = buckets_->getBucketCount();
   1137         return FALSE;
   1138     }
   1139     currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
   1140     resetRecordIterator();
   1141     return TRUE;
   1142 }
   1143 
   1144 const UnicodeString &AlphabeticIndex::getBucketLabel() const {
   1145     if (currentBucket_ != NULL) {
   1146         return currentBucket_->label_;
   1147     } else {
   1148         return emptyString_;
   1149     }
   1150 }
   1151 
   1152 
   1153 UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
   1154     if (currentBucket_ != NULL) {
   1155         return currentBucket_->labelType_;
   1156     } else {
   1157         return U_ALPHAINDEX_NORMAL;
   1158     }
   1159 }
   1160 
   1161 
   1162 int32_t AlphabeticIndex::getBucketRecordCount() const {
   1163     if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
   1164         return currentBucket_->records_->size();
   1165     } else {
   1166         return 0;
   1167     }
   1168 }
   1169 
   1170 
   1171 AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
   1172     if (U_FAILURE(status)) {
   1173         return *this;
   1174     }
   1175     internalResetBucketIterator();
   1176     return *this;
   1177 }
   1178 
   1179 
   1180 UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
   1181     if (U_FAILURE(status)) {
   1182         return FALSE;
   1183     }
   1184     if (currentBucket_ == NULL) {
   1185         // We are trying to iterate over the items in a bucket, but there is no
   1186         // current bucket from the enumeration of buckets.
   1187         status = U_INVALID_STATE_ERROR;
   1188         return FALSE;
   1189     }
   1190     if (buckets_ == NULL) {
   1191         status = U_ENUM_OUT_OF_SYNC_ERROR;
   1192         return FALSE;
   1193     }
   1194     if (currentBucket_->records_ == NULL) {
   1195         return FALSE;
   1196     }
   1197     ++itemsIterIndex_;
   1198     if (itemsIterIndex_ >= currentBucket_->records_->size()) {
   1199         itemsIterIndex_  = currentBucket_->records_->size();
   1200         return FALSE;
   1201     }
   1202     return TRUE;
   1203 }
   1204 
   1205 
   1206 const UnicodeString &AlphabeticIndex::getRecordName() const {
   1207     const UnicodeString *retStr = &emptyString_;
   1208     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
   1209         itemsIterIndex_ >= 0 &&
   1210         itemsIterIndex_ < currentBucket_->records_->size()) {
   1211             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
   1212             retStr = &item->name_;
   1213     }
   1214     return *retStr;
   1215 }
   1216 
   1217 const void *AlphabeticIndex::getRecordData() const {
   1218     const void *retPtr = NULL;
   1219     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
   1220         itemsIterIndex_ >= 0 &&
   1221         itemsIterIndex_ < currentBucket_->records_->size()) {
   1222             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
   1223             retPtr = item->data_;
   1224     }
   1225     return retPtr;
   1226 }
   1227 
   1228 
   1229 AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
   1230     itemsIterIndex_ = -1;
   1231     return *this;
   1232 }
   1233 
   1234 
   1235 
   1236 AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
   1237                                 const UnicodeString &lowerBoundary,
   1238                                 UAlphabeticIndexLabelType type)
   1239         : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
   1240           displayBucket_(NULL), displayIndex_(-1),
   1241           records_(NULL) {
   1242 }
   1243 
   1244 
   1245 AlphabeticIndex::Bucket::~Bucket() {
   1246     delete records_;
   1247 }
   1248 
   1249 U_NAMESPACE_END
   1250 
   1251 #endif  // !UCONFIG_NO_COLLATION
   1252