Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2009-2014, International Business Machines Corporation and
      4 * others. All Rights Reserved.
      5 *******************************************************************************
      6 */
      7 
      8 #include "unicode/utypes.h"
      9 
     10 #if !UCONFIG_NO_COLLATION
     11 
     12 #include "unicode/alphaindex.h"
     13 #include "unicode/coll.h"
     14 #include "unicode/localpointer.h"
     15 #include "unicode/normalizer2.h"
     16 #include "unicode/tblcoll.h"
     17 #include "unicode/uchar.h"
     18 #include "unicode/ulocdata.h"
     19 #include "unicode/uniset.h"
     20 #include "unicode/uobject.h"
     21 #include "unicode/usetiter.h"
     22 #include "unicode/utf16.h"
     23 
     24 #include "cmemory.h"
     25 #include "cstring.h"
     26 #include "uassert.h"
     27 #include "uvector.h"
     28 #include "uvectr64.h"
     29 
     30 //#include <string>
     31 //#include <iostream>
     32 
     33 U_NAMESPACE_BEGIN
     34 
     35 namespace {
     36 
     37 /**
     38  * Prefix string for Chinese index buckets.
     39  * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
     40  */
     41 const UChar BASE[1] = { 0xFDD0 };
     42 const int32_t BASE_LENGTH = 1;
     43 
     44 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
     45                                 const UnicodeString &one, const UnicodeString &other);
     46 
     47 }  // namespace
     48 
     49 static int32_t U_CALLCONV
     50 collatorComparator(const void *context, const void *left, const void *right);
     51 
     52 static int32_t U_CALLCONV
     53 recordCompareFn(const void *context, const void *left, const void *right);
     54 
     55 //  UVector<Record *> support function, delete a Record.
     56 static void U_CALLCONV
     57 alphaIndex_deleteRecord(void *obj) {
     58     delete static_cast<AlphabeticIndex::Record *>(obj);
     59 }
     60 
     61 namespace {
     62 
     63 UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
     64                            UErrorCode &errorCode) {
     65     if (U_FAILURE(errorCode)) { return NULL; }
     66     if (owned.isValid()) {
     67         return owned.orphan();
     68     }
     69     UnicodeString *p = new UnicodeString(s);
     70     if (p == NULL) {
     71         errorCode = U_MEMORY_ALLOCATION_ERROR;
     72     }
     73     return p;
     74 }
     75 
     76 inline UnicodeString *getString(const UVector &list, int32_t i) {
     77     return static_cast<UnicodeString *>(list[i]);
     78 }
     79 
     80 inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
     81     return static_cast<AlphabeticIndex::Bucket *>(list[i]);
     82 }
     83 
     84 inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
     85     return static_cast<AlphabeticIndex::Record *>(list[i]);
     86 }
     87 
     88 /**
     89  * Like Java Collections.binarySearch(List, String, Comparator).
     90  *
     91  * @return the index>=0 where the item was found,
     92  *         or the index<0 for inserting the string at ~index in sorted order
     93  */
     94 int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
     95     if (list.size() == 0) { return ~0; }
     96     int32_t start = 0;
     97     int32_t limit = list.size();
     98     for (;;) {
     99         int32_t i = (start + limit) / 2;
    100         const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
    101         UErrorCode errorCode = U_ZERO_ERROR;
    102         UCollationResult cmp = coll.compare(s, *si, errorCode);
    103         if (cmp == UCOL_EQUAL) {
    104             return i;
    105         } else if (cmp < 0) {
    106             if (i == start) {
    107                 return ~start;  // insert s before *si
    108             }
    109             limit = i;
    110         } else {
    111             if (i == start) {
    112                 return ~(start + 1);  // insert s after *si
    113             }
    114             start = i;
    115         }
    116     }
    117 }
    118 
    119 }  // namespace
    120 
    121 // The BucketList is not in the anonymous namespace because only Clang
    122 // seems to support its use in other classes from there.
    123 // However, we also don't need U_I18N_API because it is not used from outside the i18n library.
    124 class BucketList : public UObject {
    125 public:
    126     BucketList(UVector *bucketList, UVector *publicBucketList)
    127             : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
    128         int32_t displayIndex = 0;
    129         for (int32_t i = 0; i < publicBucketList->size(); ++i) {
    130             getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
    131         }
    132     }
    133 
    134     // The virtual destructor must not be inline.
    135     // See ticket #8454 for details.
    136     virtual ~BucketList();
    137 
    138     int32_t getBucketCount() const {
    139         return immutableVisibleList_->size();
    140     }
    141 
    142     int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
    143                            UErrorCode &errorCode) {
    144         // binary search
    145         int32_t start = 0;
    146         int32_t limit = bucketList_->size();
    147         while ((start + 1) < limit) {
    148             int32_t i = (start + limit) / 2;
    149             const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
    150             UCollationResult nameVsBucket =
    151                 collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
    152             if (nameVsBucket < 0) {
    153                 limit = i;
    154             } else {
    155                 start = i;
    156             }
    157         }
    158         const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
    159         if (bucket->displayBucket_ != NULL) {
    160             bucket = bucket->displayBucket_;
    161         }
    162         return bucket->displayIndex_;
    163     }
    164 
    165     /** All of the buckets, visible and invisible. */
    166     UVector *bucketList_;
    167     /** Just the visible buckets. */
    168     UVector *immutableVisibleList_;
    169 };
    170 
    171 BucketList::~BucketList() {
    172     delete bucketList_;
    173     if (immutableVisibleList_ != bucketList_) {
    174         delete immutableVisibleList_;
    175     }
    176 }
    177 
    178 AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
    179     delete buckets_;
    180     delete collatorPrimaryOnly_;
    181 }
    182 
    183 int32_t
    184 AlphabeticIndex::ImmutableIndex::getBucketCount() const {
    185     return buckets_->getBucketCount();
    186 }
    187 
    188 int32_t
    189 AlphabeticIndex::ImmutableIndex::getBucketIndex(
    190         const UnicodeString &name, UErrorCode &errorCode) const {
    191     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
    192 }
    193 
    194 const AlphabeticIndex::Bucket *
    195 AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
    196     if (0 <= index && index < buckets_->getBucketCount()) {
    197         return icu::getBucket(*buckets_->immutableVisibleList_, index);
    198     } else {
    199         return NULL;
    200     }
    201 }
    202 
    203 AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
    204         : inputList_(NULL),
    205           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
    206           maxLabelCount_(99),
    207           initialLabels_(NULL), firstCharsInScripts_(NULL),
    208           collator_(NULL), collatorPrimaryOnly_(NULL),
    209           buckets_(NULL) {
    210     init(&locale, status);
    211 }
    212 
    213 
    214 AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
    215         : inputList_(NULL),
    216           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
    217           maxLabelCount_(99),
    218           initialLabels_(NULL), firstCharsInScripts_(NULL),
    219           collator_(collator), collatorPrimaryOnly_(NULL),
    220           buckets_(NULL) {
    221     init(NULL, status);
    222 }
    223 
    224 
    225 
    226 AlphabeticIndex::~AlphabeticIndex() {
    227     delete collator_;
    228     delete collatorPrimaryOnly_;
    229     delete firstCharsInScripts_;
    230     delete buckets_;
    231     delete inputList_;
    232     delete initialLabels_;
    233 }
    234 
    235 
    236 AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
    237     if (U_FAILURE(status)) {
    238         return *this;
    239     }
    240     initialLabels_->addAll(additions);
    241     clearBuckets();
    242     return *this;
    243 }
    244 
    245 
    246 AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
    247     addIndexExemplars(locale, status);
    248     clearBuckets();
    249     return *this;
    250 }
    251 
    252 
    253 AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
    254     if (U_FAILURE(errorCode)) { return NULL; }
    255     // In C++, the ImmutableIndex must own its copy of the BucketList,
    256     // even if it contains no records, for proper memory management.
    257     // We could clone the buckets_ if they are not NULL,
    258     // but that would be worth it only if this method is called multiple times,
    259     // or called after using the old-style bucket iterator API.
    260     LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
    261     LocalPointer<RuleBasedCollator> coll(
    262         static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
    263     if (immutableBucketList.isNull() || coll.isNull()) {
    264         errorCode = U_MEMORY_ALLOCATION_ERROR;
    265         return NULL;
    266     }
    267     ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
    268     if (immIndex == NULL) {
    269         errorCode = U_MEMORY_ALLOCATION_ERROR;
    270         return NULL;
    271     }
    272     // The ImmutableIndex adopted its parameter objects.
    273     immutableBucketList.orphan();
    274     coll.orphan();
    275     return immIndex;
    276 }
    277 
    278 int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
    279     initBuckets(status);
    280     if (U_FAILURE(status)) {
    281         return 0;
    282     }
    283     return buckets_->getBucketCount();
    284 }
    285 
    286 
    287 int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
    288     if (U_FAILURE(status) || inputList_ == NULL) {
    289         return 0;
    290     }
    291     return inputList_->size();
    292 }
    293 
    294 void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
    295     const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
    296     if (U_FAILURE(errorCode)) { return; }
    297 
    298     const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
    299     const UnicodeString &overflowBoundary =
    300         *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
    301 
    302     // We make a sorted array of elements.
    303     // Some of the input may be redundant.
    304     // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
    305     // We filter out those cases.
    306     UnicodeSetIterator iter(*initialLabels_);
    307     while (iter.next()) {
    308         const UnicodeString *item = &iter.getString();
    309         LocalPointer<UnicodeString> ownedItem;
    310         UBool checkDistinct;
    311         int32_t itemLength = item->length();
    312         if (!item->hasMoreChar32Than(0, itemLength, 1)) {
    313             checkDistinct = FALSE;
    314         } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
    315                 item->charAt(itemLength - 2) != 0x2a) {
    316             // Use a label if it is marked with one trailing star,
    317             // even if the label string sorts the same when all contractions are suppressed.
    318             ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
    319             item = ownedItem.getAlias();
    320             if (item == NULL) {
    321                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    322                 return;
    323             }
    324             checkDistinct = FALSE;
    325         } else {
    326             checkDistinct = TRUE;
    327         }
    328         if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
    329             // Ignore a primary-ignorable or non-alphabetic index character.
    330         } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
    331             // Ignore an index character that will land in the overflow bucket.
    332         } else if (checkDistinct &&
    333                 collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
    334             // Ignore a multi-code point index character that does not sort distinctly
    335             // from the sequence of its separate characters.
    336         } else {
    337             int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
    338             if (insertionPoint < 0) {
    339                 indexCharacters.insertElementAt(
    340                     ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
    341             } else {
    342                 const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
    343                 if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
    344                     indexCharacters.setElementAt(
    345                         ownedString(*item, ownedItem, errorCode), insertionPoint);
    346                 }
    347             }
    348         }
    349     }
    350     if (U_FAILURE(errorCode)) { return; }
    351 
    352     // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
    353 
    354     int32_t size = indexCharacters.size() - 1;
    355     if (size > maxLabelCount_) {
    356         int32_t count = 0;
    357         int32_t old = -1;
    358         for (int32_t i = 0; i < indexCharacters.size();) {
    359             ++count;
    360             int32_t bump = count * maxLabelCount_ / size;
    361             if (bump == old) {
    362                 indexCharacters.removeElementAt(i);
    363             } else {
    364                 old = bump;
    365                 ++i;
    366             }
    367         }
    368     }
    369 }
    370 
    371 namespace {
    372 
    373 const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
    374     if (!current.startsWith(BASE, BASE_LENGTH)) {
    375         return current;
    376     }
    377     UChar rest = current.charAt(BASE_LENGTH);
    378     if (0x2800 < rest && rest <= 0x28FF) { // stroke count
    379         int32_t count = rest-0x2800;
    380         temp.setTo((UChar)(0x30 + count % 10));
    381         if (count >= 10) {
    382             count /= 10;
    383             temp.insert(0, (UChar)(0x30 + count % 10));
    384             if (count >= 10) {
    385                 count /= 10;
    386                 temp.insert(0, (UChar)(0x30 + count));
    387             }
    388         }
    389         return temp.append((UChar)0x5283);
    390     }
    391     return temp.setTo(current, BASE_LENGTH);
    392 }
    393 
    394 UBool hasMultiplePrimaryWeights(
    395         const RuleBasedCollator &coll, uint32_t variableTop,
    396         const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
    397     ces.removeAllElements();
    398     coll.internalGetCEs(s, ces, errorCode);
    399     if (U_FAILURE(errorCode)) { return FALSE; }
    400     UBool seenPrimary = FALSE;
    401     for (int32_t i = 0; i < ces.size(); ++i) {
    402         int64_t ce = ces.elementAti(i);
    403         uint32_t p = (uint32_t)(ce >> 32);
    404         if (p > variableTop) {
    405             // not primary ignorable
    406             if (seenPrimary) {
    407                 return TRUE;
    408             }
    409             seenPrimary = TRUE;
    410         }
    411     }
    412     return FALSE;
    413 }
    414 
    415 }  // namespace
    416 
    417 BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
    418     // Initialize indexCharacters.
    419     UVector indexCharacters(errorCode);
    420     indexCharacters.setDeleter(uprv_deleteUObject);
    421     initLabels(indexCharacters, errorCode);
    422     if (U_FAILURE(errorCode)) { return NULL; }
    423 
    424     // Variables for hasMultiplePrimaryWeights().
    425     UVector64 ces(errorCode);
    426     uint32_t variableTop;
    427     if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
    428         variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
    429     } else {
    430         variableTop = 0;
    431     }
    432     UBool hasInvisibleBuckets = FALSE;
    433 
    434     // Helper arrays for Chinese Pinyin collation.
    435     Bucket *asciiBuckets[26] = {
    436         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    437         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
    438     };
    439     Bucket *pinyinBuckets[26] = {
    440         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    441         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
    442     };
    443     UBool hasPinyin = FALSE;
    444 
    445     LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
    446     if (U_FAILURE(errorCode)) {
    447         return NULL;
    448     }
    449     bucketList->setDeleter(uprv_deleteUObject);
    450 
    451     // underflow bucket
    452     Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
    453     if (bucket == NULL) {
    454         errorCode = U_MEMORY_ALLOCATION_ERROR;
    455         return NULL;
    456     }
    457     bucketList->addElement(bucket, errorCode);
    458     if (U_FAILURE(errorCode)) { return NULL; }
    459 
    460     UnicodeString temp;
    461 
    462     // fix up the list, adding underflow, additions, overflow
    463     // Insert inflow labels as needed.
    464     int32_t scriptIndex = -1;
    465     const UnicodeString *scriptUpperBoundary = &emptyString_;
    466     for (int32_t i = 0; i < indexCharacters.size(); ++i) {
    467         UnicodeString &current = *getString(indexCharacters, i);
    468         if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
    469             // We crossed the script boundary into a new script.
    470             const UnicodeString &inflowBoundary = *scriptUpperBoundary;
    471             UBool skippedScript = FALSE;
    472             for (;;) {
    473                 scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
    474                 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
    475                     break;
    476                 }
    477                 skippedScript = TRUE;
    478             }
    479             if (skippedScript && bucketList->size() > 1) {
    480                 // We are skipping one or more scripts,
    481                 // and we are not just getting out of the underflow label.
    482                 bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
    483                 if (bucket == NULL) {
    484                     errorCode = U_MEMORY_ALLOCATION_ERROR;
    485                     return NULL;
    486                 }
    487                 bucketList->addElement(bucket, errorCode);
    488             }
    489         }
    490         // Add a bucket with the current label.
    491         bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
    492         if (bucket == NULL) {
    493             errorCode = U_MEMORY_ALLOCATION_ERROR;
    494             return NULL;
    495         }
    496         bucketList->addElement(bucket, errorCode);
    497         // Remember ASCII and Pinyin buckets for Pinyin redirects.
    498         UChar c;
    499         if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
    500             asciiBuckets[c - 0x41] = bucket;
    501         } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
    502                 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
    503             pinyinBuckets[c - 0x41] = bucket;
    504             hasPinyin = TRUE;
    505         }
    506         // Check for multiple primary weights.
    507         if (!current.startsWith(BASE, BASE_LENGTH) &&
    508                 hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
    509                                           ces, errorCode) &&
    510                 current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
    511             // "AE-ligature" or "Sch" etc.
    512             for (int32_t i = bucketList->size() - 2;; --i) {
    513                 Bucket *singleBucket = getBucket(*bucketList, i);
    514                 if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
    515                     // There is no single-character bucket since the last
    516                     // underflow or inflow label.
    517                     break;
    518                 }
    519                 if (singleBucket->displayBucket_ == NULL &&
    520                         !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
    521                                                    singleBucket->lowerBoundary_,
    522                                                    ces, errorCode)) {
    523                     // Add an invisible bucket that redirects strings greater than the expansion
    524                     // to the previous single-character bucket.
    525                     // For example, after ... Q R S Sch we add Sch\uFFFF->S
    526                     // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
    527                     bucket = new Bucket(emptyString_,
    528                         UnicodeString(current).append((UChar)0xFFFF),
    529                         U_ALPHAINDEX_NORMAL);
    530                     if (bucket == NULL) {
    531                         errorCode = U_MEMORY_ALLOCATION_ERROR;
    532                         return NULL;
    533                     }
    534                     bucket->displayBucket_ = singleBucket;
    535                     bucketList->addElement(bucket, errorCode);
    536                     hasInvisibleBuckets = TRUE;
    537                     break;
    538                 }
    539             }
    540         }
    541     }
    542     if (U_FAILURE(errorCode)) { return NULL; }
    543     if (bucketList->size() == 1) {
    544         // No real labels, show only the underflow label.
    545         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
    546         if (bl == NULL) {
    547             errorCode = U_MEMORY_ALLOCATION_ERROR;
    548             return NULL;
    549         }
    550         bucketList.orphan();
    551         return bl;
    552     }
    553     // overflow bucket
    554     bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
    555     if (bucket == NULL) {
    556         errorCode = U_MEMORY_ALLOCATION_ERROR;
    557         return NULL;
    558     }
    559     bucketList->addElement(bucket, errorCode); // final
    560 
    561     if (hasPinyin) {
    562         // Redirect Pinyin buckets.
    563         Bucket *asciiBucket = NULL;
    564         for (int32_t i = 0; i < 26; ++i) {
    565             if (asciiBuckets[i] != NULL) {
    566                 asciiBucket = asciiBuckets[i];
    567             }
    568             if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
    569                 pinyinBuckets[i]->displayBucket_ = asciiBucket;
    570                 hasInvisibleBuckets = TRUE;
    571             }
    572         }
    573     }
    574 
    575     if (U_FAILURE(errorCode)) { return NULL; }
    576     if (!hasInvisibleBuckets) {
    577         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
    578         if (bl == NULL) {
    579             errorCode = U_MEMORY_ALLOCATION_ERROR;
    580             return NULL;
    581         }
    582         bucketList.orphan();
    583         return bl;
    584     }
    585     // Merge inflow buckets that are visually adjacent.
    586     // Iterate backwards: Merge inflow into overflow rather than the other way around.
    587     int32_t i = bucketList->size() - 1;
    588     Bucket *nextBucket = getBucket(*bucketList, i);
    589     while (--i > 0) {
    590         bucket = getBucket(*bucketList, i);
    591         if (bucket->displayBucket_ != NULL) {
    592             continue;  // skip invisible buckets
    593         }
    594         if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
    595             if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
    596                 bucket->displayBucket_ = nextBucket;
    597                 continue;
    598             }
    599         }
    600         nextBucket = bucket;
    601     }
    602 
    603     LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
    604     if (U_FAILURE(errorCode)) {
    605         return NULL;
    606     }
    607     // Do not call publicBucketList->setDeleter():
    608     // This vector shares its objects with the bucketList.
    609     for (int32_t i = 0; i < bucketList->size(); ++i) {
    610         bucket = getBucket(*bucketList, i);
    611         if (bucket->displayBucket_ == NULL) {
    612             publicBucketList->addElement(bucket, errorCode);
    613         }
    614     }
    615     if (U_FAILURE(errorCode)) { return NULL; }
    616     BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
    617     if (bl == NULL) {
    618         errorCode = U_MEMORY_ALLOCATION_ERROR;
    619         return NULL;
    620     }
    621     bucketList.orphan();
    622     publicBucketList.orphan();
    623     return bl;
    624 }
    625 
    626 /**
    627  * Creates an index, and buckets and sorts the list of records into the index.
    628  */
    629 void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
    630     if (U_FAILURE(errorCode) || buckets_ != NULL) {
    631         return;
    632     }
    633     buckets_ = createBucketList(errorCode);
    634     if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
    635         return;
    636     }
    637 
    638     // Sort the records by name.
    639     // Stable sort preserves input order of collation duplicates.
    640     inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
    641 
    642     // Now, we traverse all of the input, which is now sorted.
    643     // If the item doesn't go in the current bucket, we find the next bucket that contains it.
    644     // This makes the process order n*log(n), since we just sort the list and then do a linear process.
    645     // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
    646     // we need to improve it for that case.
    647 
    648     Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
    649     int32_t bucketIndex = 1;
    650     Bucket *nextBucket;
    651     const UnicodeString *upperBoundary;
    652     if (bucketIndex < buckets_->bucketList_->size()) {
    653         nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
    654         upperBoundary = &nextBucket->lowerBoundary_;
    655     } else {
    656         nextBucket = NULL;
    657         upperBoundary = NULL;
    658     }
    659     for (int32_t i = 0; i < inputList_->size(); ++i) {
    660         Record *r = getRecord(*inputList_, i);
    661         // if the current bucket isn't the right one, find the one that is
    662         // We have a special flag for the last bucket so that we don't look any further
    663         while (upperBoundary != NULL &&
    664                 collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
    665             currentBucket = nextBucket;
    666             // now reset the boundary that we compare against
    667             if (bucketIndex < buckets_->bucketList_->size()) {
    668                 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
    669                 upperBoundary = &nextBucket->lowerBoundary_;
    670             } else {
    671                 upperBoundary = NULL;
    672             }
    673         }
    674         // now put the record into the bucket.
    675         Bucket *bucket = currentBucket;
    676         if (bucket->displayBucket_ != NULL) {
    677             bucket = bucket->displayBucket_;
    678         }
    679         if (bucket->records_ == NULL) {
    680             bucket->records_ = new UVector(errorCode);
    681             if (bucket->records_ == NULL) {
    682                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    683                 return;
    684             }
    685         }
    686         bucket->records_->addElement(r, errorCode);
    687     }
    688 }
    689 
    690 void AlphabeticIndex::clearBuckets() {
    691     if (buckets_ != NULL) {
    692         delete buckets_;
    693         buckets_ = NULL;
    694         internalResetBucketIterator();
    695     }
    696 }
    697 
    698 void AlphabeticIndex::internalResetBucketIterator() {
    699     labelsIterIndex_ = -1;
    700     currentBucket_ = NULL;
    701 }
    702 
    703 
    704 void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
    705     LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
    706     if (U_FAILURE(status)) {
    707         return;
    708     }
    709 
    710     UnicodeSet exemplars;
    711     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
    712     if (U_SUCCESS(status)) {
    713         initialLabels_->addAll(exemplars);
    714         return;
    715     }
    716     status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
    717 
    718     // The locale data did not include explicit Index characters.
    719     // Synthesize a set of them from the locale's standard exemplar characters.
    720     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
    721     if (U_FAILURE(status)) {
    722         return;
    723     }
    724 
    725     // question: should we add auxiliary exemplars?
    726     if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
    727         exemplars.add(0x61, 0x7A);
    728     }
    729     if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
    730         // cut down to small list
    731         exemplars.remove(0xAC00, 0xD7A3).
    732             add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
    733             add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
    734             add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
    735             add(0xD30C).add(0xD558);
    736     }
    737     if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
    738         // cut down to small list
    739         // make use of the fact that Ethiopic is allocated in 8's, where
    740         // the base is 0 mod 8.
    741         UnicodeSet ethiopic(
    742             UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
    743         UnicodeSetIterator it(ethiopic);
    744         while (it.next() && !it.isString()) {
    745             if ((it.getCodepoint() & 0x7) != 0) {
    746                 exemplars.remove(it.getCodepoint());
    747             }
    748         }
    749     }
    750 
    751     // Upper-case any that aren't already so.
    752     //   (We only do this for synthesized index characters.)
    753     UnicodeSetIterator it(exemplars);
    754     UnicodeString upperC;
    755     while (it.next()) {
    756         const UnicodeString &exemplarC = it.getString();
    757         upperC = exemplarC;
    758         upperC.toUpper(locale);
    759         initialLabels_->add(upperC);
    760     }
    761 }
    762 
    763 UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
    764     UnicodeSet contractions;
    765     collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
    766     if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
    767     initialLabels_->addAll(contractions);
    768     UnicodeSetIterator iter(contractions);
    769     while (iter.next()) {
    770         const UnicodeString &s = iter.getString();
    771         U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
    772         UChar c = s.charAt(s.length() - 1);
    773         if (0x41 <= c && c <= 0x5A) {  // A-Z
    774             // There are Pinyin labels, add ASCII A-Z labels as well.
    775             initialLabels_->add(0x41, 0x5A);  // A-Z
    776             break;
    777         }
    778     }
    779     return TRUE;
    780 }
    781 
    782 
    783 /*
    784  * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
    785  */
    786 static const UChar CGJ = 0x034F;
    787 UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
    788     UnicodeString result;
    789     if (item.length() == 0) {
    790         return result;
    791     }
    792     int32_t i = 0;
    793     for (;;) {
    794         UChar32  cp = item.char32At(i);
    795         result.append(cp);
    796         i = item.moveIndex32(i, 1);
    797         if (i >= item.length()) {
    798             break;
    799         }
    800         result.append(CGJ);
    801     }
    802     return result;
    803 }
    804 
    805 
    806 UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
    807     return FALSE;
    808 }
    809 
    810 
    811 UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
    812     return FALSE;
    813 }
    814 
    815 
    816 const RuleBasedCollator &AlphabeticIndex::getCollator() const {
    817     return *collator_;
    818 }
    819 
    820 
    821 const UnicodeString &AlphabeticIndex::getInflowLabel() const {
    822     return inflowLabel_;
    823 }
    824 
    825 const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
    826     return overflowLabel_;
    827 }
    828 
    829 
    830 const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
    831     return underflowLabel_;
    832 }
    833 
    834 
    835 AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
    836     inflowLabel_ = label;
    837     clearBuckets();
    838     return *this;
    839 }
    840 
    841 
    842 AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
    843     overflowLabel_ = label;
    844     clearBuckets();
    845     return *this;
    846 }
    847 
    848 
    849 AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
    850     underflowLabel_ = label;
    851     clearBuckets();
    852     return *this;
    853 }
    854 
    855 
    856 int32_t AlphabeticIndex::getMaxLabelCount() const {
    857     return maxLabelCount_;
    858 }
    859 
    860 
    861 AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
    862     if (U_FAILURE(status)) {
    863         return *this;
    864     }
    865     if (maxLabelCount <= 0) {
    866         status = U_ILLEGAL_ARGUMENT_ERROR;
    867         return *this;
    868     }
    869     maxLabelCount_ = maxLabelCount;
    870     clearBuckets();
    871     return *this;
    872 }
    873 
    874 
    875 //
    876 //  init() - Common code for constructors.
    877 //
    878 
    879 void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
    880     if (U_FAILURE(status)) { return; }
    881     if (locale == NULL && collator_ == NULL) {
    882         status = U_ILLEGAL_ARGUMENT_ERROR;
    883         return;
    884     }
    885 
    886     initialLabels_         = new UnicodeSet();
    887     if (initialLabels_ == NULL) {
    888         status = U_MEMORY_ALLOCATION_ERROR;
    889         return;
    890     }
    891 
    892     inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
    893     overflowLabel_ = inflowLabel_;
    894     underflowLabel_ = inflowLabel_;
    895 
    896     if (collator_ == NULL) {
    897         Collator *coll = Collator::createInstance(*locale, status);
    898         if (U_FAILURE(status)) {
    899             delete coll;
    900             return;
    901         }
    902         if (coll == NULL) {
    903             status = U_MEMORY_ALLOCATION_ERROR;
    904             return;
    905         }
    906         collator_ = dynamic_cast<RuleBasedCollator *>(coll);
    907         if (collator_ == NULL) {
    908             delete coll;
    909             status = U_UNSUPPORTED_ERROR;
    910             return;
    911         }
    912     }
    913     collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
    914     if (collatorPrimaryOnly_ == NULL) {
    915         status = U_MEMORY_ALLOCATION_ERROR;
    916         return;
    917     }
    918     collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
    919     firstCharsInScripts_ = firstStringsInScript(status);
    920     if (U_FAILURE(status)) { return; }
    921     firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
    922     // Guard against a degenerate collator where
    923     // some script boundary strings are primary ignorable.
    924     for (;;) {
    925         if (U_FAILURE(status)) { return; }
    926         if (firstCharsInScripts_->isEmpty()) {
    927             // AlphabeticIndex requires some non-ignorable script boundary strings.
    928             status = U_ILLEGAL_ARGUMENT_ERROR;
    929             return;
    930         }
    931         if (collatorPrimaryOnly_->compare(
    932                 *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
    933                 emptyString_, status) == UCOL_EQUAL) {
    934             firstCharsInScripts_->removeElementAt(0);
    935         } else {
    936             break;
    937         }
    938     }
    939 
    940     // Chinese index characters, which are specific to each of the several Chinese tailorings,
    941     // take precedence over the single locale data exemplar set per language.
    942     if (!addChineseIndexCharacters(status) && locale != NULL) {
    943         addIndexExemplars(*locale, status);
    944     }
    945 }
    946 
    947 
    948 //
    949 //  Comparison function for UVector<UnicodeString *> sorting with a collator.
    950 //
    951 static int32_t U_CALLCONV
    952 collatorComparator(const void *context, const void *left, const void *right) {
    953     const UElement *leftElement = static_cast<const UElement *>(left);
    954     const UElement *rightElement = static_cast<const UElement *>(right);
    955     const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
    956     const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
    957 
    958     if (leftString == rightString) {
    959         // Catches case where both are NULL
    960         return 0;
    961     }
    962     if (leftString == NULL) {
    963         return 1;
    964     };
    965     if (rightString == NULL) {
    966         return -1;
    967     }
    968     const Collator *col = static_cast<const Collator *>(context);
    969     UErrorCode errorCode = U_ZERO_ERROR;
    970     return col->compare(*leftString, *rightString, errorCode);
    971 }
    972 
    973 //
    974 //  Comparison function for UVector<Record *> sorting with a collator.
    975 //
    976 static int32_t U_CALLCONV
    977 recordCompareFn(const void *context, const void *left, const void *right) {
    978     const UElement *leftElement = static_cast<const UElement *>(left);
    979     const UElement *rightElement = static_cast<const UElement *>(right);
    980     const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
    981     const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
    982     const Collator *col = static_cast<const Collator *>(context);
    983     UErrorCode errorCode = U_ZERO_ERROR;
    984     return col->compare(leftRec->name_, rightRec->name_, errorCode);
    985 }
    986 
    987 UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
    988     if (U_FAILURE(status)) {
    989         return NULL;
    990     }
    991     LocalPointer<UVector> dest(new UVector(status), status);
    992     if (U_FAILURE(status)) {
    993         return NULL;
    994     }
    995     dest->setDeleter(uprv_deleteUObject);
    996     // Fetch the script-first-primary contractions which are defined in the root collator.
    997     // They all start with U+FDD1.
    998     UnicodeSet set;
    999     collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
   1000     if (U_FAILURE(status)) {
   1001         return NULL;
   1002     }
   1003     if (set.isEmpty()) {
   1004         status = U_UNSUPPORTED_ERROR;
   1005         return NULL;
   1006     }
   1007     UnicodeSetIterator iter(set);
   1008     while (iter.next()) {
   1009         const UnicodeString &boundary = iter.getString();
   1010         uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
   1011         if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
   1012             // Ignore boundaries for the special reordering groups.
   1013             // Take only those for "real scripts" (where the sample character is a Letter,
   1014             // and the one for unassigned implicit weights (Cn).
   1015             continue;
   1016         }
   1017         UnicodeString *s = new UnicodeString(boundary);
   1018         if (s == NULL) {
   1019             status = U_MEMORY_ALLOCATION_ERROR;
   1020             return NULL;
   1021         }
   1022         dest->addElement(s, status);
   1023     }
   1024     return dest.orphan();
   1025 }
   1026 
   1027 
   1028 namespace {
   1029 
   1030 /**
   1031  * Returns true if one index character string is "better" than the other.
   1032  * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
   1033  * better, and otherwise binary-less-than is better.
   1034  */
   1035 UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
   1036                                 const UnicodeString &one, const UnicodeString &other) {
   1037     // This is called with primary-equal strings, but never with one.equals(other).
   1038     UErrorCode status = U_ZERO_ERROR;
   1039     UnicodeString n1 = nfkdNormalizer.normalize(one, status);
   1040     UnicodeString n2 = nfkdNormalizer.normalize(other, status);
   1041     if (U_FAILURE(status)) { return FALSE; }
   1042     int32_t result = n1.countChar32() - n2.countChar32();
   1043     if (result != 0) {
   1044         return result < 0;
   1045     }
   1046     result = n1.compareCodePointOrder(n2);
   1047     if (result != 0) {
   1048         return result < 0;
   1049     }
   1050     return one.compareCodePointOrder(other) < 0;
   1051 }
   1052 
   1053 }  // namespace
   1054 
   1055 //
   1056 //  Constructor & Destructor for AlphabeticIndex::Record
   1057 //
   1058 //     Records are internal only, instances are not directly surfaced in the public API.
   1059 //     This class is mostly struct-like, with all public fields.
   1060 
   1061 AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
   1062         : name_(name), data_(data) {}
   1063 
   1064 AlphabeticIndex::Record::~Record() {
   1065 }
   1066 
   1067 
   1068 AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
   1069     if (U_FAILURE(status)) {
   1070         return *this;
   1071     }
   1072     if (inputList_ == NULL) {
   1073         inputList_ = new UVector(status);
   1074         if (inputList_ == NULL) {
   1075             status = U_MEMORY_ALLOCATION_ERROR;
   1076             return *this;
   1077         }
   1078         inputList_->setDeleter(alphaIndex_deleteRecord);
   1079     }
   1080     Record *r = new Record(name, data);
   1081     if (r == NULL) {
   1082         status = U_MEMORY_ALLOCATION_ERROR;
   1083         return *this;
   1084     }
   1085     inputList_->addElement(r, status);
   1086     clearBuckets();
   1087     //std::string ss;
   1088     //std::string ss2;
   1089     //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
   1090     //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
   1091     return *this;
   1092 }
   1093 
   1094 
   1095 AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
   1096     if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
   1097         inputList_->removeAllElements();
   1098         clearBuckets();
   1099     }
   1100     return *this;
   1101 }
   1102 
   1103 int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
   1104     initBuckets(status);
   1105     if (U_FAILURE(status)) {
   1106         return 0;
   1107     }
   1108     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
   1109 }
   1110 
   1111 
   1112 int32_t AlphabeticIndex::getBucketIndex() const {
   1113     return labelsIterIndex_;
   1114 }
   1115 
   1116 
   1117 UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
   1118     if (U_FAILURE(status)) {
   1119         return FALSE;
   1120     }
   1121     if (buckets_ == NULL && currentBucket_ != NULL) {
   1122         status = U_ENUM_OUT_OF_SYNC_ERROR;
   1123         return FALSE;
   1124     }
   1125     initBuckets(status);
   1126     if (U_FAILURE(status)) {
   1127         return FALSE;
   1128     }
   1129     ++labelsIterIndex_;
   1130     if (labelsIterIndex_ >= buckets_->getBucketCount()) {
   1131         labelsIterIndex_ = buckets_->getBucketCount();
   1132         return FALSE;
   1133     }
   1134     currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
   1135     resetRecordIterator();
   1136     return TRUE;
   1137 }
   1138 
   1139 const UnicodeString &AlphabeticIndex::getBucketLabel() const {
   1140     if (currentBucket_ != NULL) {
   1141         return currentBucket_->label_;
   1142     } else {
   1143         return emptyString_;
   1144     }
   1145 }
   1146 
   1147 
   1148 UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
   1149     if (currentBucket_ != NULL) {
   1150         return currentBucket_->labelType_;
   1151     } else {
   1152         return U_ALPHAINDEX_NORMAL;
   1153     }
   1154 }
   1155 
   1156 
   1157 int32_t AlphabeticIndex::getBucketRecordCount() const {
   1158     if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
   1159         return currentBucket_->records_->size();
   1160     } else {
   1161         return 0;
   1162     }
   1163 }
   1164 
   1165 
   1166 AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
   1167     if (U_FAILURE(status)) {
   1168         return *this;
   1169     }
   1170     internalResetBucketIterator();
   1171     return *this;
   1172 }
   1173 
   1174 
   1175 UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
   1176     if (U_FAILURE(status)) {
   1177         return FALSE;
   1178     }
   1179     if (currentBucket_ == NULL) {
   1180         // We are trying to iterate over the items in a bucket, but there is no
   1181         // current bucket from the enumeration of buckets.
   1182         status = U_INVALID_STATE_ERROR;
   1183         return FALSE;
   1184     }
   1185     if (buckets_ == NULL) {
   1186         status = U_ENUM_OUT_OF_SYNC_ERROR;
   1187         return FALSE;
   1188     }
   1189     if (currentBucket_->records_ == NULL) {
   1190         return FALSE;
   1191     }
   1192     ++itemsIterIndex_;
   1193     if (itemsIterIndex_ >= currentBucket_->records_->size()) {
   1194         itemsIterIndex_  = currentBucket_->records_->size();
   1195         return FALSE;
   1196     }
   1197     return TRUE;
   1198 }
   1199 
   1200 
   1201 const UnicodeString &AlphabeticIndex::getRecordName() const {
   1202     const UnicodeString *retStr = &emptyString_;
   1203     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
   1204         itemsIterIndex_ >= 0 &&
   1205         itemsIterIndex_ < currentBucket_->records_->size()) {
   1206             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
   1207             retStr = &item->name_;
   1208     }
   1209     return *retStr;
   1210 }
   1211 
   1212 const void *AlphabeticIndex::getRecordData() const {
   1213     const void *retPtr = NULL;
   1214     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
   1215         itemsIterIndex_ >= 0 &&
   1216         itemsIterIndex_ < currentBucket_->records_->size()) {
   1217             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
   1218             retPtr = item->data_;
   1219     }
   1220     return retPtr;
   1221 }
   1222 
   1223 
   1224 AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
   1225     itemsIterIndex_ = -1;
   1226     return *this;
   1227 }
   1228 
   1229 
   1230 
   1231 AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
   1232                                 const UnicodeString &lowerBoundary,
   1233                                 UAlphabeticIndexLabelType type)
   1234         : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
   1235           displayBucket_(NULL), displayIndex_(-1),
   1236           records_(NULL) {
   1237 }
   1238 
   1239 
   1240 AlphabeticIndex::Bucket::~Bucket() {
   1241     delete records_;
   1242 }
   1243 
   1244 U_NAMESPACE_END
   1245 
   1246 #endif  // !UCONFIG_NO_COLLATION
   1247