Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1996-2010, International Business Machines Corporation and    *
      4 * others. All Rights Reserved.                                                *
      5 *******************************************************************************
      6 */
      7 
      8 /*
      9 * File coleitr.cpp
     10 *
     11 *
     12 *
     13 * Created by: Helena Shih
     14 *
     15 * Modification History:
     16 *
     17 *  Date      Name        Description
     18 *
     19 *  6/23/97   helena      Adding comments to make code more readable.
     20 * 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
     21 * 12/10/99   aliu        Ported Thai collation support from Java.
     22 * 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
     23 * 02/19/01   swquek      Removed CollationElementsIterator() since it is
     24 *                        private constructor and no calls are made to it
     25 */
     26 
     27 #include "unicode/utypes.h"
     28 
     29 #if !UCONFIG_NO_COLLATION
     30 
     31 #include "unicode/coleitr.h"
     32 #include "unicode/ustring.h"
     33 #include "ucol_imp.h"
     34 #include "cmemory.h"
     35 
     36 
     37 /* Constants --------------------------------------------------------------- */
     38 
     39 U_NAMESPACE_BEGIN
     40 
     41 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
     42 
     43 /* CollationElementIterator public constructor/destructor ------------------ */
     44 
     45 CollationElementIterator::CollationElementIterator(
     46                                          const CollationElementIterator& other)
     47                                          : UObject(other), isDataOwned_(TRUE)
     48 {
     49     UErrorCode status = U_ZERO_ERROR;
     50     m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
     51                                 &status);
     52 
     53     *this = other;
     54 }
     55 
     56 CollationElementIterator::~CollationElementIterator()
     57 {
     58     if (isDataOwned_) {
     59         ucol_closeElements(m_data_);
     60     }
     61 }
     62 
     63 /* CollationElementIterator public methods --------------------------------- */
     64 
     65 int32_t CollationElementIterator::getOffset() const
     66 {
     67     return ucol_getOffset(m_data_);
     68 }
     69 
     70 /**
     71 * Get the ordering priority of the next character in the string.
     72 * @return the next character's ordering. Returns NULLORDER if an error has
     73 *         occured or if the end of string has been reached
     74 */
     75 int32_t CollationElementIterator::next(UErrorCode& status)
     76 {
     77     return ucol_next(m_data_, &status);
     78 }
     79 
     80 UBool CollationElementIterator::operator!=(
     81                                   const CollationElementIterator& other) const
     82 {
     83     return !(*this == other);
     84 }
     85 
     86 UBool CollationElementIterator::operator==(
     87                                     const CollationElementIterator& that) const
     88 {
     89     if (this == &that || m_data_ == that.m_data_) {
     90         return TRUE;
     91     }
     92 
     93     // option comparison
     94     if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
     95     {
     96         return FALSE;
     97     }
     98 
     99     // the constructor and setText always sets a length
    100     // and we only compare the string not the contents of the normalization
    101     // buffer
    102     int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
    103     int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
    104 
    105     if (thislength != thatlength) {
    106         return FALSE;
    107     }
    108 
    109     if (uprv_memcmp(m_data_->iteratordata_.string,
    110                     that.m_data_->iteratordata_.string,
    111                     thislength * U_SIZEOF_UCHAR) != 0) {
    112         return FALSE;
    113     }
    114     if (getOffset() != that.getOffset()) {
    115         return FALSE;
    116     }
    117 
    118     // checking normalization buffer
    119     if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
    120         if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
    121             return FALSE;
    122         }
    123         // both are in the normalization buffer
    124         if (m_data_->iteratordata_.pos
    125             - m_data_->iteratordata_.writableBuffer.getBuffer()
    126             != that.m_data_->iteratordata_.pos
    127             - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
    128             // not in the same position in the normalization buffer
    129             return FALSE;
    130         }
    131     }
    132     else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
    133         return FALSE;
    134     }
    135     // checking ce position
    136     return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
    137             == (that.m_data_->iteratordata_.CEpos
    138                                         - that.m_data_->iteratordata_.CEs);
    139 }
    140 
    141 /**
    142 * Get the ordering priority of the previous collation element in the string.
    143 * @param status the error code status.
    144 * @return the previous element's ordering. Returns NULLORDER if an error has
    145 *         occured or if the start of string has been reached.
    146 */
    147 int32_t CollationElementIterator::previous(UErrorCode& status)
    148 {
    149     return ucol_previous(m_data_, &status);
    150 }
    151 
    152 /**
    153 * Resets the cursor to the beginning of the string.
    154 */
    155 void CollationElementIterator::reset()
    156 {
    157     ucol_reset(m_data_);
    158 }
    159 
    160 void CollationElementIterator::setOffset(int32_t newOffset,
    161                                          UErrorCode& status)
    162 {
    163     ucol_setOffset(m_data_, newOffset, &status);
    164 }
    165 
    166 /**
    167 * Sets the source to the new source string.
    168 */
    169 void CollationElementIterator::setText(const UnicodeString& source,
    170                                        UErrorCode& status)
    171 {
    172     if (U_FAILURE(status)) {
    173         return;
    174     }
    175 
    176     int32_t length = source.length();
    177     UChar *string = NULL;
    178     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
    179         uprv_free((UChar *)m_data_->iteratordata_.string);
    180     }
    181     m_data_->isWritable = TRUE;
    182     if (length > 0) {
    183         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
    184         /* test for NULL */
    185         if (string == NULL) {
    186             status = U_MEMORY_ALLOCATION_ERROR;
    187             return;
    188         }
    189         u_memcpy(string, source.getBuffer(), length);
    190     }
    191     else {
    192         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    193         /* test for NULL */
    194         if (string == NULL) {
    195             status = U_MEMORY_ALLOCATION_ERROR;
    196             return;
    197         }
    198         *string = 0;
    199     }
    200     /* Free offsetBuffer before initializing it. */
    201     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
    202     uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
    203         &m_data_->iteratordata_, &status);
    204 
    205     m_data_->reset_   = TRUE;
    206 }
    207 
    208 // Sets the source to the new character iterator.
    209 void CollationElementIterator::setText(CharacterIterator& source,
    210                                        UErrorCode& status)
    211 {
    212     if (U_FAILURE(status))
    213         return;
    214 
    215     int32_t length = source.getLength();
    216     UChar *buffer = NULL;
    217 
    218     if (length == 0) {
    219         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    220         /* test for NULL */
    221         if (buffer == NULL) {
    222             status = U_MEMORY_ALLOCATION_ERROR;
    223             return;
    224         }
    225         *buffer = 0;
    226     }
    227     else {
    228         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
    229         /* test for NULL */
    230         if (buffer == NULL) {
    231             status = U_MEMORY_ALLOCATION_ERROR;
    232             return;
    233         }
    234         /*
    235         Using this constructor will prevent buffer from being removed when
    236         string gets removed
    237         */
    238         UnicodeString string;
    239         source.getText(string);
    240         u_memcpy(buffer, string.getBuffer(), length);
    241     }
    242 
    243     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
    244         uprv_free((UChar *)m_data_->iteratordata_.string);
    245     }
    246     m_data_->isWritable = TRUE;
    247     /* Free offsetBuffer before initializing it. */
    248     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
    249     uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
    250         &m_data_->iteratordata_, &status);
    251     m_data_->reset_   = TRUE;
    252 }
    253 
    254 int32_t CollationElementIterator::strengthOrder(int32_t order) const
    255 {
    256     UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
    257     // Mask off the unwanted differences.
    258     if (s == UCOL_PRIMARY) {
    259         order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
    260     }
    261     else if (s == UCOL_SECONDARY) {
    262         order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
    263     }
    264 
    265     return order;
    266 }
    267 
    268 /* CollationElementIterator private constructors/destructors --------------- */
    269 
    270 /**
    271 * This is the "real" constructor for this class; it constructs an iterator
    272 * over the source text using the specified collator
    273 */
    274 CollationElementIterator::CollationElementIterator(
    275                                                const UnicodeString& sourceText,
    276                                                const RuleBasedCollator* order,
    277                                                UErrorCode& status)
    278                                                : isDataOwned_(TRUE)
    279 {
    280     if (U_FAILURE(status)) {
    281         return;
    282     }
    283 
    284     int32_t length = sourceText.length();
    285     UChar *string = NULL;
    286 
    287     if (length > 0) {
    288         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
    289         /* test for NULL */
    290         if (string == NULL) {
    291             status = U_MEMORY_ALLOCATION_ERROR;
    292             return;
    293         }
    294         /*
    295         Using this constructor will prevent buffer from being removed when
    296         string gets removed
    297         */
    298         u_memcpy(string, sourceText.getBuffer(), length);
    299     }
    300     else {
    301         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    302         /* test for NULL */
    303         if (string == NULL) {
    304             status = U_MEMORY_ALLOCATION_ERROR;
    305             return;
    306         }
    307         *string = 0;
    308     }
    309     m_data_ = ucol_openElements(order->ucollator, string, length, &status);
    310 
    311     /* Test for buffer overflows */
    312     if (U_FAILURE(status)) {
    313         return;
    314     }
    315     m_data_->isWritable = TRUE;
    316 }
    317 
    318 /**
    319 * This is the "real" constructor for this class; it constructs an iterator over
    320 * the source text using the specified collator
    321 */
    322 CollationElementIterator::CollationElementIterator(
    323                                            const CharacterIterator& sourceText,
    324                                            const RuleBasedCollator* order,
    325                                            UErrorCode& status)
    326                                            : isDataOwned_(TRUE)
    327 {
    328     if (U_FAILURE(status))
    329         return;
    330 
    331     // **** should I just drop this test? ****
    332     /*
    333     if ( sourceText.endIndex() != 0 )
    334     {
    335         // A CollationElementIterator is really a two-layered beast.
    336         // Internally it uses a Normalizer to munge the source text into a form
    337         // where all "composed" Unicode characters (such as \u00FC) are split into a
    338         // normal character and a combining accent character.
    339         // Afterward, CollationElementIterator does its own processing to handle
    340         // expanding and contracting collation sequences, ignorables, and so on.
    341 
    342         Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
    343                                 ? Normalizer::NO_OP : order->getDecomposition();
    344 
    345         text = new Normalizer(sourceText, decomp);
    346         if (text == NULL)
    347         status = U_MEMORY_ALLOCATION_ERROR;
    348     }
    349     */
    350     int32_t length = sourceText.getLength();
    351     UChar *buffer;
    352     if (length > 0) {
    353         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
    354         /* test for NULL */
    355         if (buffer == NULL) {
    356             status = U_MEMORY_ALLOCATION_ERROR;
    357             return;
    358         }
    359         /*
    360         Using this constructor will prevent buffer from being removed when
    361         string gets removed
    362         */
    363         UnicodeString string(buffer, length, length);
    364         ((CharacterIterator &)sourceText).getText(string);
    365         const UChar *temp = string.getBuffer();
    366         u_memcpy(buffer, temp, length);
    367     }
    368     else {
    369         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    370         /* test for NULL */
    371         if (buffer == NULL) {
    372             status = U_MEMORY_ALLOCATION_ERROR;
    373             return;
    374         }
    375         *buffer = 0;
    376     }
    377     m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
    378 
    379     /* Test for buffer overflows */
    380     if (U_FAILURE(status)) {
    381         return;
    382     }
    383     m_data_->isWritable = TRUE;
    384 }
    385 
    386 /* CollationElementIterator protected methods ----------------------------- */
    387 
    388 const CollationElementIterator& CollationElementIterator::operator=(
    389                                          const CollationElementIterator& other)
    390 {
    391     if (this != &other)
    392     {
    393         UCollationElements *ucolelem      = this->m_data_;
    394         UCollationElements *otherucolelem = other.m_data_;
    395         collIterate        *coliter       = &(ucolelem->iteratordata_);
    396         collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
    397         int                length         = 0;
    398 
    399         // checking only UCOL_ITER_HASLEN is not enough here as we may be in
    400         // the normalization buffer
    401         length = (int)(othercoliter->endp - othercoliter->string);
    402 
    403         ucolelem->reset_         = otherucolelem->reset_;
    404         ucolelem->isWritable     = TRUE;
    405 
    406         /* create a duplicate of string */
    407         if (length > 0) {
    408             coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
    409             if(coliter->string != NULL) {
    410                 uprv_memcpy((UChar *)coliter->string, othercoliter->string,
    411                     length * U_SIZEOF_UCHAR);
    412             } else { // Error: couldn't allocate memory. No copying should be done
    413                 length = 0;
    414             }
    415         }
    416         else {
    417             coliter->string = NULL;
    418         }
    419 
    420         /* start and end of string */
    421         coliter->endp = coliter->string + length;
    422 
    423         /* handle writable buffer here */
    424 
    425         if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
    426             coliter->writableBuffer = othercoliter->writableBuffer;
    427             coliter->writableBuffer.getTerminatedBuffer();
    428         }
    429 
    430         /* current position */
    431         if (othercoliter->pos >= othercoliter->string &&
    432             othercoliter->pos <= othercoliter->endp)
    433         {
    434             coliter->pos = coliter->string +
    435                 (othercoliter->pos - othercoliter->string);
    436         }
    437         else {
    438             coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
    439                 (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
    440         }
    441 
    442         /* CE buffer */
    443         int32_t CEsize;
    444         if (coliter->extendCEs) {
    445             uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
    446             CEsize = sizeof(othercoliter->extendCEs);
    447             if (CEsize > 0) {
    448                 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
    449                 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
    450             }
    451             coliter->toReturn = coliter->extendCEs +
    452                 (othercoliter->toReturn - othercoliter->extendCEs);
    453             coliter->CEpos    = coliter->extendCEs + CEsize;
    454         } else {
    455             CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
    456             if (CEsize > 0) {
    457                 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
    458             }
    459             coliter->toReturn = coliter->CEs +
    460                 (othercoliter->toReturn - othercoliter->CEs);
    461             coliter->CEpos    = coliter->CEs + CEsize;
    462         }
    463 
    464         if (othercoliter->fcdPosition != NULL) {
    465             coliter->fcdPosition = coliter->string +
    466                 (othercoliter->fcdPosition
    467                 - othercoliter->string);
    468         }
    469         else {
    470             coliter->fcdPosition = NULL;
    471         }
    472         coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
    473         coliter->origFlags   = othercoliter->origFlags;
    474         coliter->coll = othercoliter->coll;
    475         this->isDataOwned_ = TRUE;
    476     }
    477 
    478     return *this;
    479 }
    480 
    481 U_NAMESPACE_END
    482 
    483 #endif /* #if !UCONFIG_NO_COLLATION */
    484 
    485 /* eof */
    486