Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 1996-2009, International Business Machines Corporation and    *
      4 * others. All Rights Reserved.                                                *
      5 *******************************************************************************
      6 */
      7 
      8 /*
      9 * File coleitr.cpp
     10 *
     11 *
     12 *
     13 * Created by: Helena Shih
     14 *
     15 * Modification History:
     16 *
     17 *  Date      Name        Description
     18 *
     19 *  6/23/97   helena      Adding comments to make code more readable.
     20 * 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
     21 * 12/10/99   aliu        Ported Thai collation support from Java.
     22 * 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
     23 * 02/19/01   swquek      Removed CollationElementsIterator() since it is
     24 *                        private constructor and no calls are made to it
     25 */
     26 
     27 #include "unicode/utypes.h"
     28 
     29 #if !UCONFIG_NO_COLLATION
     30 
     31 #include "unicode/coleitr.h"
     32 #include "unicode/ustring.h"
     33 #include "ucol_imp.h"
     34 #include "cmemory.h"
     35 
     36 
     37 /* Constants --------------------------------------------------------------- */
     38 
     39 U_NAMESPACE_BEGIN
     40 
     41 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
     42 
     43 /* CollationElementIterator public constructor/destructor ------------------ */
     44 
     45 CollationElementIterator::CollationElementIterator(
     46                                          const CollationElementIterator& other)
     47                                          : UObject(other), isDataOwned_(TRUE)
     48 {
     49     UErrorCode status = U_ZERO_ERROR;
     50     m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
     51                                 &status);
     52 
     53     *this = other;
     54 }
     55 
     56 CollationElementIterator::~CollationElementIterator()
     57 {
     58     if (isDataOwned_) {
     59         ucol_closeElements(m_data_);
     60     }
     61 }
     62 
     63 /* CollationElementIterator public methods --------------------------------- */
     64 
     65 int32_t CollationElementIterator::getOffset() const
     66 {
     67     return ucol_getOffset(m_data_);
     68 }
     69 
     70 /**
     71 * Get the ordering priority of the next character in the string.
     72 * @return the next character's ordering. Returns NULLORDER if an error has
     73 *         occured or if the end of string has been reached
     74 */
     75 int32_t CollationElementIterator::next(UErrorCode& status)
     76 {
     77     return ucol_next(m_data_, &status);
     78 }
     79 
     80 UBool CollationElementIterator::operator!=(
     81                                   const CollationElementIterator& other) const
     82 {
     83     return !(*this == other);
     84 }
     85 
     86 UBool CollationElementIterator::operator==(
     87                                     const CollationElementIterator& that) const
     88 {
     89     if (this == &that || m_data_ == that.m_data_) {
     90         return TRUE;
     91     }
     92 
     93     // option comparison
     94     if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
     95     {
     96         return FALSE;
     97     }
     98 
     99     // the constructor and setText always sets a length
    100     // and we only compare the string not the contents of the normalization
    101     // buffer
    102     int thislength = m_data_->iteratordata_.endp -
    103                      m_data_->iteratordata_.string;
    104     int thatlength = that.m_data_->iteratordata_.endp -
    105                      that.m_data_->iteratordata_.string;
    106 
    107     if (thislength != thatlength) {
    108         return FALSE;
    109     }
    110 
    111     if (uprv_memcmp(m_data_->iteratordata_.string,
    112                     that.m_data_->iteratordata_.string,
    113                     thislength * U_SIZEOF_UCHAR) != 0) {
    114         return FALSE;
    115     }
    116     if (getOffset() != that.getOffset()) {
    117         return FALSE;
    118     }
    119 
    120     // checking normalization buffer
    121     if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
    122         if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
    123             return FALSE;
    124         }
    125         // both are in the normalization buffer
    126         if (m_data_->iteratordata_.pos
    127             - m_data_->iteratordata_.writableBuffer
    128             != that.m_data_->iteratordata_.pos
    129             - that.m_data_->iteratordata_.writableBuffer) {
    130             // not in the same position in the normalization buffer
    131             return FALSE;
    132         }
    133     }
    134     else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
    135         return FALSE;
    136     }
    137     // checking ce position
    138     return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
    139             == (that.m_data_->iteratordata_.CEpos
    140                                         - that.m_data_->iteratordata_.CEs);
    141 }
    142 
    143 /**
    144 * Get the ordering priority of the previous collation element in the string.
    145 * @param status the error code status.
    146 * @return the previous element's ordering. Returns NULLORDER if an error has
    147 *         occured or if the start of string has been reached.
    148 */
    149 int32_t CollationElementIterator::previous(UErrorCode& status)
    150 {
    151     return ucol_previous(m_data_, &status);
    152 }
    153 
    154 /**
    155 * Resets the cursor to the beginning of the string.
    156 */
    157 void CollationElementIterator::reset()
    158 {
    159     ucol_reset(m_data_);
    160 }
    161 
    162 void CollationElementIterator::setOffset(int32_t newOffset,
    163                                          UErrorCode& status)
    164 {
    165     ucol_setOffset(m_data_, newOffset, &status);
    166 }
    167 
    168 /**
    169 * Sets the source to the new source string.
    170 */
    171 void CollationElementIterator::setText(const UnicodeString& source,
    172                                        UErrorCode& status)
    173 {
    174     if (U_FAILURE(status)) {
    175         return;
    176     }
    177 
    178     int32_t length = source.length();
    179     UChar *string = NULL;
    180     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
    181         uprv_free(m_data_->iteratordata_.string);
    182     }
    183     m_data_->isWritable = TRUE;
    184     if (length > 0) {
    185         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
    186         /* test for NULL */
    187         if (string == NULL) {
    188             status = U_MEMORY_ALLOCATION_ERROR;
    189             return;
    190         }
    191         u_memcpy(string, source.getBuffer(), length);
    192     }
    193     else {
    194         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    195         /* test for NULL */
    196         if (string == NULL) {
    197             status = U_MEMORY_ALLOCATION_ERROR;
    198             return;
    199         }
    200         *string = 0;
    201     }
    202     /* Free offsetBuffer before initializing it. */
    203     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
    204     uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
    205         &m_data_->iteratordata_);
    206 
    207     m_data_->reset_   = TRUE;
    208 }
    209 
    210 // Sets the source to the new character iterator.
    211 void CollationElementIterator::setText(CharacterIterator& source,
    212                                        UErrorCode& status)
    213 {
    214     if (U_FAILURE(status))
    215         return;
    216 
    217     int32_t length = source.getLength();
    218     UChar *buffer = NULL;
    219 
    220     if (length == 0) {
    221         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    222         /* test for NULL */
    223         if (buffer == NULL) {
    224             status = U_MEMORY_ALLOCATION_ERROR;
    225             return;
    226         }
    227         *buffer = 0;
    228     }
    229     else {
    230         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
    231         /* test for NULL */
    232         if (buffer == NULL) {
    233             status = U_MEMORY_ALLOCATION_ERROR;
    234             return;
    235         }
    236         /*
    237         Using this constructor will prevent buffer from being removed when
    238         string gets removed
    239         */
    240         UnicodeString string;
    241         source.getText(string);
    242         u_memcpy(buffer, string.getBuffer(), length);
    243     }
    244 
    245     if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
    246         uprv_free(m_data_->iteratordata_.string);
    247     }
    248     m_data_->isWritable = TRUE;
    249     /* Free offsetBuffer before initializing it. */
    250     ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
    251     uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
    252         &m_data_->iteratordata_);
    253     m_data_->reset_   = TRUE;
    254 }
    255 
    256 int32_t CollationElementIterator::strengthOrder(int32_t order) const
    257 {
    258     UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
    259     // Mask off the unwanted differences.
    260     if (s == UCOL_PRIMARY) {
    261         order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
    262     }
    263     else if (s == UCOL_SECONDARY) {
    264         order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
    265     }
    266 
    267     return order;
    268 }
    269 
    270 /* CollationElementIterator private constructors/destructors --------------- */
    271 
    272 /**
    273 * This is the "real" constructor for this class; it constructs an iterator
    274 * over the source text using the specified collator
    275 */
    276 CollationElementIterator::CollationElementIterator(
    277                                                const UnicodeString& sourceText,
    278                                                const RuleBasedCollator* order,
    279                                                UErrorCode& status)
    280                                                : isDataOwned_(TRUE)
    281 {
    282     if (U_FAILURE(status)) {
    283         return;
    284     }
    285 
    286     int32_t length = sourceText.length();
    287     UChar *string = NULL;
    288 
    289     if (length > 0) {
    290         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
    291         /* test for NULL */
    292         if (string == NULL) {
    293             status = U_MEMORY_ALLOCATION_ERROR;
    294             return;
    295         }
    296         /*
    297         Using this constructor will prevent buffer from being removed when
    298         string gets removed
    299         */
    300         u_memcpy(string, sourceText.getBuffer(), length);
    301     }
    302     else {
    303         string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    304         /* test for NULL */
    305         if (string == NULL) {
    306             status = U_MEMORY_ALLOCATION_ERROR;
    307             return;
    308         }
    309         *string = 0;
    310     }
    311     m_data_ = ucol_openElements(order->ucollator, string, length, &status);
    312 
    313     /* Test for buffer overflows */
    314     if (U_FAILURE(status)) {
    315         return;
    316     }
    317     m_data_->isWritable = TRUE;
    318 }
    319 
    320 /**
    321 * This is the "real" constructor for this class; it constructs an iterator over
    322 * the source text using the specified collator
    323 */
    324 CollationElementIterator::CollationElementIterator(
    325                                            const CharacterIterator& sourceText,
    326                                            const RuleBasedCollator* order,
    327                                            UErrorCode& status)
    328                                            : isDataOwned_(TRUE)
    329 {
    330     if (U_FAILURE(status))
    331         return;
    332 
    333     // **** should I just drop this test? ****
    334     /*
    335     if ( sourceText.endIndex() != 0 )
    336     {
    337         // A CollationElementIterator is really a two-layered beast.
    338         // Internally it uses a Normalizer to munge the source text into a form
    339         // where all "composed" Unicode characters (such as \u00FC) are split into a
    340         // normal character and a combining accent character.
    341         // Afterward, CollationElementIterator does its own processing to handle
    342         // expanding and contracting collation sequences, ignorables, and so on.
    343 
    344         Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
    345                                 ? Normalizer::NO_OP : order->getDecomposition();
    346 
    347         text = new Normalizer(sourceText, decomp);
    348         if (text == NULL)
    349         status = U_MEMORY_ALLOCATION_ERROR;
    350     }
    351     */
    352     int32_t length = sourceText.getLength();
    353     UChar *buffer;
    354     if (length > 0) {
    355         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
    356         /* test for NULL */
    357         if (buffer == NULL) {
    358             status = U_MEMORY_ALLOCATION_ERROR;
    359             return;
    360         }
    361         /*
    362         Using this constructor will prevent buffer from being removed when
    363         string gets removed
    364         */
    365         UnicodeString string(buffer, length, length);
    366         ((CharacterIterator &)sourceText).getText(string);
    367         const UChar *temp = string.getBuffer();
    368         u_memcpy(buffer, temp, length);
    369     }
    370     else {
    371         buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
    372         /* test for NULL */
    373         if (buffer == NULL) {
    374             status = U_MEMORY_ALLOCATION_ERROR;
    375             return;
    376         }
    377         *buffer = 0;
    378     }
    379     m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
    380 
    381     /* Test for buffer overflows */
    382     if (U_FAILURE(status)) {
    383         return;
    384     }
    385     m_data_->isWritable = TRUE;
    386 }
    387 
    388 /* CollationElementIterator protected methods ----------------------------- */
    389 
    390 const CollationElementIterator& CollationElementIterator::operator=(
    391                                          const CollationElementIterator& other)
    392 {
    393     if (this != &other)
    394     {
    395         UCollationElements *ucolelem      = this->m_data_;
    396         UCollationElements *otherucolelem = other.m_data_;
    397         collIterate        *coliter       = &(ucolelem->iteratordata_);
    398         collIterate        *othercoliter  = &(otherucolelem->iteratordata_);
    399         int                length         = 0;
    400 
    401         // checking only UCOL_ITER_HASLEN is not enough here as we may be in
    402         // the normalization buffer
    403         length = othercoliter->endp - othercoliter->string;
    404 
    405         ucolelem->reset_         = otherucolelem->reset_;
    406         ucolelem->isWritable     = TRUE;
    407 
    408         /* create a duplicate of string */
    409         if (length > 0) {
    410             coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
    411             if(coliter->string != NULL) {
    412                 uprv_memcpy(coliter->string, othercoliter->string,
    413                     length * U_SIZEOF_UCHAR);
    414             } else { // Error: couldn't allocate memory. No copying should be done
    415                 length = 0;
    416             }
    417         }
    418         else {
    419             coliter->string = NULL;
    420         }
    421 
    422         /* start and end of string */
    423         coliter->endp = coliter->string + length;
    424 
    425         /* handle writable buffer here */
    426 
    427         if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
    428             uint32_t wlength = u_strlen(othercoliter->writableBuffer) + 1;
    429             if (wlength < coliter->writableBufSize) {
    430                 uprv_memcpy(coliter->stackWritableBuffer,
    431                     othercoliter->stackWritableBuffer,
    432                     wlength * U_SIZEOF_UCHAR);
    433             }
    434             else {
    435                 if (coliter->writableBuffer != coliter->stackWritableBuffer) {
    436                     uprv_free(coliter->writableBuffer);
    437                 }
    438                 coliter->writableBuffer = (UChar *)uprv_malloc(
    439                     wlength * U_SIZEOF_UCHAR);
    440                 if(coliter->writableBuffer != NULL) {
    441                     uprv_memcpy(coliter->writableBuffer,
    442                         othercoliter->writableBuffer,
    443                         wlength * U_SIZEOF_UCHAR);
    444                     coliter->writableBufSize = wlength;
    445                 } else { // Error: couldn't allocate memory for writableBuffer
    446                     coliter->writableBufSize = 0;
    447                 }
    448             }
    449         }
    450 
    451         /* current position */
    452         if (othercoliter->pos >= othercoliter->string &&
    453             othercoliter->pos <= othercoliter->endp)
    454         {
    455             coliter->pos = coliter->string +
    456                 (othercoliter->pos - othercoliter->string);
    457         }
    458         else if (coliter->writableBuffer != NULL) {
    459             coliter->pos = coliter->writableBuffer +
    460                 (othercoliter->pos - othercoliter->writableBuffer);
    461         }
    462         else {
    463             // Error: couldn't allocate memory for writableBuffer
    464             coliter->pos = NULL;
    465         }
    466 
    467         /* CE buffer */
    468         int32_t CEsize;
    469         if (coliter->extendCEs) {
    470             uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
    471             CEsize = sizeof(othercoliter->extendCEs);
    472             if (CEsize > 0) {
    473                 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
    474                 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
    475             }
    476             coliter->toReturn = coliter->extendCEs +
    477                 (othercoliter->toReturn - othercoliter->extendCEs);
    478             coliter->CEpos    = coliter->extendCEs + CEsize;
    479         } else {
    480             CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
    481             if (CEsize > 0) {
    482                 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
    483             }
    484             coliter->toReturn = coliter->CEs +
    485                 (othercoliter->toReturn - othercoliter->CEs);
    486             coliter->CEpos    = coliter->CEs + CEsize;
    487         }
    488 
    489         if (othercoliter->fcdPosition != NULL) {
    490             coliter->fcdPosition = coliter->string +
    491                 (othercoliter->fcdPosition
    492                 - othercoliter->string);
    493         }
    494         else {
    495             coliter->fcdPosition = NULL;
    496         }
    497         coliter->flags       = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
    498         coliter->origFlags   = othercoliter->origFlags;
    499         coliter->coll = othercoliter->coll;
    500         this->isDataOwned_ = TRUE;
    501     }
    502 
    503     return *this;
    504 }
    505 
    506 U_NAMESPACE_END
    507 
    508 #endif /* #if !UCONFIG_NO_COLLATION */
    509 
    510 /* eof */
    511