Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1996-2011, International Business Machines Corporation and    *
      4 * others. All Rights Reserved.                                                *
      5 *******************************************************************************
      6 */
      7 
      8 /*
      9 * File coleitr.cpp
     10 *
     11 *
     12 *
     13 * Created by: Helena Shih
     14 *
     15 * Modification History:
     16 *
     17 *  Date      Name        Description
     18 *
     19 *  6/23/97   helena      Adding comments to make code more readable.
     20 * 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
     21 * 12/10/99   aliu        Ported Thai collation support from Java.
     22 * 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
     23 * 02/19/01   swquek      Removed CollationElementsIterator() since it is
     24 *                        private constructor and no calls are made to it
     25 */
     26 
     27 #include "unicode/utypes.h"
     28 
     29 #if !UCONFIG_NO_COLLATION
     30 
     31 #include "unicode/coleitr.h"
     32 #include "unicode/ustring.h"
     33 #include "ucol_imp.h"
     34 #include "uassert.h"
     35 #include "cmemory.h"
     36 
     37 
     38 /* Constants --------------------------------------------------------------- */
     39 
     40 U_NAMESPACE_BEGIN
     41 
     42 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
     43 
     44 /* CollationElementIterator public constructor/destructor ------------------ */
     45 
     46 CollationElementIterator::CollationElementIterator(
     47                                          const CollationElementIterator& other)
     48                                          : UObject(other), isDataOwned_(TRUE)
     49 {
     50     UErrorCode status = U_ZERO_ERROR;
     51     m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
     52                                 &status);
     53 
     54     *this = other;
     55 }
     56 
     57 CollationElementIterator::~CollationElementIterator()
     58 {
     59     if (isDataOwned_) {
     60         ucol_closeElements(m_data_);
     61     }
     62 }
     63 
     64 /* CollationElementIterator public methods --------------------------------- */
     65 
     66 int32_t CollationElementIterator::getOffset() const
     67 {
     68     return ucol_getOffset(m_data_);
     69 }
     70 
     71 /**
     72 * Get the ordering priority of the next character in the string.
     73 * @return the next character's ordering. Returns NULLORDER if an error has
     74 *         occured or if the end of string has been reached
     75 */
     76 int32_t CollationElementIterator::next(UErrorCode& status)
     77 {
     78     return ucol_next(m_data_, &status);
     79 }
     80 
     81 UBool CollationElementIterator::operator!=(
     82                                   const CollationElementIterator& other) const
     83 {
     84     return !(*this == other);
     85 }
     86 
     87 UBool CollationElementIterator::operator==(
     88                                     const CollationElementIterator& that) const
     89 {
     90     if (this == &that || m_data_ == that.m_data_) {
     91         return TRUE;
     92     }
     93 
     94     // option comparison
     95     if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
     96     {
     97         return FALSE;
     98     }
     99 
    100     // the constructor and setText always sets a length
    101     // and we only compare the string not the contents of the normalization
    102     // buffer
    103     int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
    104     int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
    105 
    106     if (thislength != thatlength) {
    107         return FALSE;
    108     }
    109 
    110     if (uprv_memcmp(m_data_->iteratordata_.string,
    111                     that.m_data_->iteratordata_.string,
    112                     thislength * U_SIZEOF_UCHAR) != 0) {
    113         return FALSE;
    114     }
    115     if (getOffset() != that.getOffset()) {
    116         return FALSE;
    117     }
    118 
    119     // checking normalization buffer
    120     if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
    121         if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
    122             return FALSE;
    123         }
    124         // both are in the normalization buffer
    125         if (m_data_->iteratordata_.pos
    126             - m_data_->iteratordata_.writableBuffer.getBuffer()
    127             != that.m_data_->iteratordata_.pos
    128             - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
    129             // not in the same position in the normalization buffer
    130             return FALSE;
    131         }
    132     }
    133     else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
    134         return FALSE;
    135     }
    136     // checking ce position
    137     return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
    138             == (that.m_data_->iteratordata_.CEpos
    139                                         - that.m_data_->iteratordata_.CEs);
    140 }
    141 
    142 /**
    143 * Get the ordering priority of the previous collation element in the string.
    144 * @param status the error code status.
    145 * @return the previous element's ordering. Returns NULLORDER if an error has
    146 *         occured or if the start of string has been reached.
    147 */
    148 int32_t CollationElementIterator::previous(UErrorCode& status)
    149 {
    150     return ucol_previous(m_data_, &status);
    151 }
    152 
    153 /**
    154 * Resets the cursor to the beginning of the string.
    155 */
    156 void CollationElementIterator::reset()
    157 {
    158     ucol_reset(m_data_);
    159 }
    160 
    161 void CollationElementIterator::setOffset(int32_t newOffset,
    162                                          UErrorCode& status)
    163 {
    164     ucol_setOffset(m_data_, newOffset, &status);
    165 }
    166 
    167 /**
    168 * Sets the source to the new source string.
    169 */
    170 void CollationElementIterator::setText(const UnicodeString& source,
    171                                        UErrorCode& status)
    172 {
    173     if (U_FAILURE(status)) {
    174         return;
    175     }
    176 
    177     int32_t length = source.length();
    178     UChar *string = NULL;
    179     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
    180         uprv_free((UChar *)m_data_->iteratordata_.string);
    181     }
    182     m_data_->isWritable = TRUE;
    183     if (length > 0) {
    184         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
    185         /* test for NULL */
    186         if (string == NULL) {
    187             status = U_MEMORY_ALLOCATION_ERROR;
    188             return;
    189         }
    190         u_memcpy(string, source.getBuffer(), length);
    191     }
    192     else {
    193         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    194         /* test for NULL */
    195         if (string == NULL) {
    196             status = U_MEMORY_ALLOCATION_ERROR;
    197             return;
    198         }
    199         *string = 0;
    200     }
    201     /* Free offsetBuffer before initializing it. */
    202     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
    203     uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
    204         &m_data_->iteratordata_, &status);
    205 
    206     m_data_->reset_   = TRUE;
    207 }
    208 
    209 // Sets the source to the new character iterator.
    210 void CollationElementIterator::setText(CharacterIterator& source,
    211                                        UErrorCode& status)
    212 {
    213     if (U_FAILURE(status))
    214         return;
    215 
    216     int32_t length = source.getLength();
    217     UChar *buffer = NULL;
    218 
    219     if (length == 0) {
    220         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    221         /* test for NULL */
    222         if (buffer == NULL) {
    223             status = U_MEMORY_ALLOCATION_ERROR;
    224             return;
    225         }
    226         *buffer = 0;
    227     }
    228     else {
    229         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
    230         /* test for NULL */
    231         if (buffer == NULL) {
    232             status = U_MEMORY_ALLOCATION_ERROR;
    233             return;
    234         }
    235         /*
    236         Using this constructor will prevent buffer from being removed when
    237         string gets removed
    238         */
    239         UnicodeString string;
    240         source.getText(string);
    241         u_memcpy(buffer, string.getBuffer(), length);
    242     }
    243 
    244     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
    245         uprv_free((UChar *)m_data_->iteratordata_.string);
    246     }
    247     m_data_->isWritable = TRUE;
    248     /* Free offsetBuffer before initializing it. */
    249     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
    250     uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
    251         &m_data_->iteratordata_, &status);
    252     m_data_->reset_   = TRUE;
    253 }
    254 
    255 int32_t CollationElementIterator::strengthOrder(int32_t order) const
    256 {
    257     UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
    258     // Mask off the unwanted differences.
    259     if (s == UCOL_PRIMARY) {
    260         order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
    261     }
    262     else if (s == UCOL_SECONDARY) {
    263         order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
    264     }
    265 
    266     return order;
    267 }
    268 
    269 /* CollationElementIterator private constructors/destructors --------------- */
    270 
    271 /**
    272 * This is the "real" constructor for this class; it constructs an iterator
    273 * over the source text using the specified collator
    274 */
    275 CollationElementIterator::CollationElementIterator(
    276                                                const UnicodeString& sourceText,
    277                                                const RuleBasedCollator* order,
    278                                                UErrorCode& status)
    279                                                : isDataOwned_(TRUE)
    280 {
    281     if (U_FAILURE(status)) {
    282         return;
    283     }
    284 
    285     int32_t length = sourceText.length();
    286     UChar *string = NULL;
    287 
    288     if (length > 0) {
    289         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
    290         /* test for NULL */
    291         if (string == NULL) {
    292             status = U_MEMORY_ALLOCATION_ERROR;
    293             return;
    294         }
    295         /*
    296         Using this constructor will prevent buffer from being removed when
    297         string gets removed
    298         */
    299         u_memcpy(string, sourceText.getBuffer(), length);
    300     }
    301     else {
    302         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    303         /* test for NULL */
    304         if (string == NULL) {
    305             status = U_MEMORY_ALLOCATION_ERROR;
    306             return;
    307         }
    308         *string = 0;
    309     }
    310     m_data_ = ucol_openElements(order->ucollator, string, length, &status);
    311 
    312     /* Test for buffer overflows */
    313     if (U_FAILURE(status)) {
    314         return;
    315     }
    316     m_data_->isWritable = TRUE;
    317 }
    318 
    319 /**
    320 * This is the "real" constructor for this class; it constructs an iterator over
    321 * the source text using the specified collator
    322 */
    323 CollationElementIterator::CollationElementIterator(
    324                                            const CharacterIterator& sourceText,
    325                                            const RuleBasedCollator* order,
    326                                            UErrorCode& status)
    327                                            : isDataOwned_(TRUE)
    328 {
    329     if (U_FAILURE(status))
    330         return;
    331 
    332     // **** should I just drop this test? ****
    333     /*
    334     if ( sourceText.endIndex() != 0 )
    335     {
    336         // A CollationElementIterator is really a two-layered beast.
    337         // Internally it uses a Normalizer to munge the source text into a form
    338         // where all "composed" Unicode characters (such as \u00FC) are split into a
    339         // normal character and a combining accent character.
    340         // Afterward, CollationElementIterator does its own processing to handle
    341         // expanding and contracting collation sequences, ignorables, and so on.
    342 
    343         Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
    344                                 ? Normalizer::NO_OP : order->getDecomposition();
    345 
    346         text = new Normalizer(sourceText, decomp);
    347         if (text == NULL)
    348         status = U_MEMORY_ALLOCATION_ERROR;
    349     }
    350     */
    351     int32_t length = sourceText.getLength();
    352     UChar *buffer;
    353     if (length > 0) {
    354         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
    355         /* test for NULL */
    356         if (buffer == NULL) {
    357             status = U_MEMORY_ALLOCATION_ERROR;
    358             return;
    359         }
    360         /*
    361         Using this constructor will prevent buffer from being removed when
    362         string gets removed
    363         */
    364         UnicodeString string(buffer, length, length);
    365         ((CharacterIterator &)sourceText).getText(string);
    366         const UChar *temp = string.getBuffer();
    367         u_memcpy(buffer, temp, length);
    368     }
    369     else {
    370         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    371         /* test for NULL */
    372         if (buffer == NULL) {
    373             status = U_MEMORY_ALLOCATION_ERROR;
    374             return;
    375         }
    376         *buffer = 0;
    377     }
    378     m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
    379 
    380     /* Test for buffer overflows */
    381     if (U_FAILURE(status)) {
    382         return;
    383     }
    384     m_data_->isWritable = TRUE;
    385 }
    386 
    387 /* CollationElementIterator protected methods ----------------------------- */
    388 
    389 const CollationElementIterator& CollationElementIterator::operator=(
    390                                          const CollationElementIterator& other)
    391 {
    392     if (this != &other)
    393     {
    394         UCollationElements *ucolelem      = this->m_data_;
    395         UCollationElements *otherucolelem = other.m_data_;
    396         collIterate        *coliter       = &(ucolelem->iteratordata_);
    397         collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
    398         int                length         = 0;
    399 
    400         // checking only UCOL_ITER_HASLEN is not enough here as we may be in
    401         // the normalization buffer
    402         length = (int)(othercoliter->endp - othercoliter->string);
    403 
    404         ucolelem->reset_         = otherucolelem->reset_;
    405         ucolelem->isWritable     = TRUE;
    406 
    407         /* create a duplicate of string */
    408         if (length > 0) {
    409             coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
    410             if(coliter->string != NULL) {
    411                 uprv_memcpy((UChar *)coliter->string, othercoliter->string,
    412                     length * U_SIZEOF_UCHAR);
    413             } else { // Error: couldn't allocate memory. No copying should be done
    414                 length = 0;
    415             }
    416         }
    417         else {
    418             coliter->string = NULL;
    419         }
    420 
    421         /* start and end of string */
    422         coliter->endp = coliter->string == NULL ? NULL : coliter->string + length;
    423 
    424         /* handle writable buffer here */
    425 
    426         if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
    427             coliter->writableBuffer = othercoliter->writableBuffer;
    428             coliter->writableBuffer.getTerminatedBuffer();
    429         }
    430 
    431         /* current position */
    432         if (othercoliter->pos >= othercoliter->string &&
    433             othercoliter->pos <= othercoliter->endp)
    434         {
    435             U_ASSERT(coliter->string != NULL);
    436             coliter->pos = coliter->string +
    437                 (othercoliter->pos - othercoliter->string);
    438         }
    439         else {
    440             coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
    441                 (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
    442         }
    443 
    444         /* CE buffer */
    445         int32_t CEsize;
    446         if (coliter->extendCEs) {
    447             uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
    448             CEsize = sizeof(othercoliter->extendCEs);
    449             if (CEsize > 0) {
    450                 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
    451                 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
    452             }
    453             coliter->toReturn = coliter->extendCEs +
    454                 (othercoliter->toReturn - othercoliter->extendCEs);
    455             coliter->CEpos    = coliter->extendCEs + CEsize;
    456         } else {
    457             CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
    458             if (CEsize > 0) {
    459                 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
    460             }
    461             coliter->toReturn = coliter->CEs +
    462                 (othercoliter->toReturn - othercoliter->CEs);
    463             coliter->CEpos    = coliter->CEs + CEsize;
    464         }
    465 
    466         if (othercoliter->fcdPosition != NULL) {
    467             U_ASSERT(coliter->string != NULL);
    468             coliter->fcdPosition = coliter->string +
    469                 (othercoliter->fcdPosition
    470                 - othercoliter->string);
    471         }
    472         else {
    473             coliter->fcdPosition = NULL;
    474         }
    475         coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
    476         coliter->origFlags   = othercoliter->origFlags;
    477         coliter->coll = othercoliter->coll;
    478         this->isDataOwned_ = TRUE;
    479     }
    480 
    481     return *this;
    482 }
    483 
    484 U_NAMESPACE_END
    485 
    486 #endif /* #if !UCONFIG_NO_COLLATION */
    487 
    488 /* eof */
    489