Home | History | Annotate | Download | only in i18n
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2001-2008 IBM and others. All rights reserved.
      4 **********************************************************************
      5 *   Date        Name        Description
      6 *  03/22/2000   helena      Creation.
      7 **********************************************************************
      8 */
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
     13 
     14 #include "unicode/stsearch.h"
     15 #include "usrchimp.h"
     16 #include "cmemory.h"
     17 
     18 U_NAMESPACE_BEGIN
     19 
     20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
     21 
     22 // public constructors and destructors -----------------------------------
     23 
     24 StringSearch::StringSearch(const UnicodeString &pattern,
     25                            const UnicodeString &text,
     26                            const Locale        &locale,
     27                                  BreakIterator *breakiter,
     28                                  UErrorCode    &status) :
     29                            SearchIterator(text, breakiter),
     30                            m_collator_(),
     31                            m_pattern_(pattern)
     32 {
     33     if (U_FAILURE(status)) {
     34         m_strsrch_ = NULL;
     35         return;
     36     }
     37 
     38     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
     39                               m_text_.getBuffer(), m_text_.length(),
     40                               locale.getName(), (UBreakIterator *)breakiter,
     41                               &status);
     42     uprv_free(m_search_);
     43     m_search_ = NULL;
     44 
     45     // !!! dlf m_collator_ is an odd beast.  basically it is an aliasing
     46     // wrapper around the internal collator and rules, which (here) are
     47     // owned by this stringsearch object.  this means 1) it's destructor
     48     // _should not_ delete the ucollator or rules, and 2) changes made
     49     // to the exposed collator (setStrength etc) _should_ modify the
     50     // ucollator.  thus the collator is not a copy-on-write alias, and it
     51     // needs to distinguish itself not merely from 'stand alone' colators
     52     // but also from copy-on-write ones.  it needs additional state, which
     53     // setUCollator should set.
     54 
     55     if (U_SUCCESS(status)) {
     56         // Alias the collator
     57         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
     58         // m_search_ has been created by the base SearchIterator class
     59         m_search_        = m_strsrch_->search;
     60     }
     61 }
     62 
     63 StringSearch::StringSearch(const UnicodeString     &pattern,
     64                            const UnicodeString     &text,
     65                                  RuleBasedCollator *coll,
     66                                  BreakIterator     *breakiter,
     67                                  UErrorCode        &status) :
     68                            SearchIterator(text, breakiter),
     69                            m_collator_(),
     70                            m_pattern_(pattern)
     71 {
     72     if (U_FAILURE(status)) {
     73         m_strsrch_ = NULL;
     74         return;
     75     }
     76     if (coll == NULL) {
     77         status     = U_ILLEGAL_ARGUMENT_ERROR;
     78         m_strsrch_ = NULL;
     79         return;
     80     }
     81     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
     82                                           m_pattern_.length(),
     83                                           m_text_.getBuffer(),
     84                                           m_text_.length(), coll->ucollator,
     85                                           (UBreakIterator *)breakiter,
     86                                           &status);
     87     uprv_free(m_search_);
     88     m_search_ = NULL;
     89 
     90     if (U_SUCCESS(status)) {
     91         // Alias the collator
     92         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
     93         // m_search_ has been created by the base SearchIterator class
     94         m_search_ = m_strsrch_->search;
     95     }
     96 }
     97 
     98 StringSearch::StringSearch(const UnicodeString     &pattern,
     99                                  CharacterIterator &text,
    100                            const Locale            &locale,
    101                                  BreakIterator     *breakiter,
    102                                  UErrorCode        &status) :
    103                            SearchIterator(text, breakiter),
    104                            m_collator_(),
    105                            m_pattern_(pattern)
    106 {
    107     if (U_FAILURE(status)) {
    108         m_strsrch_ = NULL;
    109         return;
    110     }
    111     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
    112                               m_text_.getBuffer(), m_text_.length(),
    113                               locale.getName(), (UBreakIterator *)breakiter,
    114                               &status);
    115     uprv_free(m_search_);
    116     m_search_ = NULL;
    117 
    118     if (U_SUCCESS(status)) {
    119         // Alias the collator
    120         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
    121         // m_search_ has been created by the base SearchIterator class
    122         m_search_ = m_strsrch_->search;
    123     }
    124 }
    125 
    126 StringSearch::StringSearch(const UnicodeString     &pattern,
    127                                  CharacterIterator &text,
    128                                  RuleBasedCollator *coll,
    129                                  BreakIterator     *breakiter,
    130                                  UErrorCode        &status) :
    131                            SearchIterator(text, breakiter),
    132                            m_collator_(),
    133                            m_pattern_(pattern)
    134 {
    135     if (U_FAILURE(status)) {
    136         m_strsrch_ = NULL;
    137         return;
    138     }
    139     if (coll == NULL) {
    140         status     = U_ILLEGAL_ARGUMENT_ERROR;
    141         m_strsrch_ = NULL;
    142         return;
    143     }
    144     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    145                                           m_pattern_.length(),
    146                                           m_text_.getBuffer(),
    147                                           m_text_.length(), coll->ucollator,
    148                                           (UBreakIterator *)breakiter,
    149                                           &status);
    150     uprv_free(m_search_);
    151     m_search_ = NULL;
    152 
    153     if (U_SUCCESS(status)) {
    154         // Alias the collator
    155         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
    156         // m_search_ has been created by the base SearchIterator class
    157         m_search_ = m_strsrch_->search;
    158     }
    159 }
    160 
    161 StringSearch::StringSearch(const StringSearch &that) :
    162                        SearchIterator(that.m_text_, that.m_breakiterator_),
    163                        m_collator_(),
    164                        m_pattern_(that.m_pattern_)
    165 {
    166     UErrorCode status = U_ZERO_ERROR;
    167 
    168     // Free m_search_ from the superclass
    169     uprv_free(m_search_);
    170     m_search_ = NULL;
    171 
    172     if (that.m_strsrch_ == NULL) {
    173         // This was not a good copy
    174         m_strsrch_ = NULL;
    175     }
    176     else {
    177         // Make a deep copy
    178         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    179                                               m_pattern_.length(),
    180                                               m_text_.getBuffer(),
    181                                               m_text_.length(),
    182                                               that.m_strsrch_->collator,
    183                                              (UBreakIterator *)that.m_breakiterator_,
    184                                               &status);
    185         if (U_SUCCESS(status)) {
    186             // Alias the collator
    187             m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
    188             // m_search_ has been created by the base SearchIterator class
    189             m_search_        = m_strsrch_->search;
    190         }
    191     }
    192 }
    193 
    194 StringSearch::~StringSearch()
    195 {
    196     if (m_strsrch_ != NULL) {
    197         usearch_close(m_strsrch_);
    198         m_search_ = NULL;
    199     }
    200 }
    201 
    202 StringSearch *
    203 StringSearch::clone() const {
    204     return new StringSearch(*this);
    205 }
    206 
    207 // operator overloading ---------------------------------------------
    208 StringSearch & StringSearch::operator=(const StringSearch &that)
    209 {
    210     if ((*this) != that) {
    211         UErrorCode status = U_ZERO_ERROR;
    212         m_text_          = that.m_text_;
    213         m_breakiterator_ = that.m_breakiterator_;
    214         m_pattern_       = that.m_pattern_;
    215         // all m_search_ in the parent class is linked up with m_strsrch_
    216         usearch_close(m_strsrch_);
    217         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    218                                               m_pattern_.length(),
    219                                               m_text_.getBuffer(),
    220                                               m_text_.length(),
    221                                               that.m_strsrch_->collator,
    222                                               NULL, &status);
    223         // Check null pointer
    224         if (m_strsrch_ != NULL) {
    225 	        // Alias the collator
    226 	        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
    227 	        m_search_ = m_strsrch_->search;
    228         }
    229     }
    230     return *this;
    231 }
    232 
    233 UBool StringSearch::operator==(const SearchIterator &that) const
    234 {
    235     if (this == &that) {
    236         return TRUE;
    237     }
    238     if (SearchIterator::operator ==(that)) {
    239         StringSearch &thatsrch = (StringSearch &)that;
    240         return (this->m_pattern_ == thatsrch.m_pattern_ &&
    241                 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
    242     }
    243     return FALSE;
    244 }
    245 
    246 // public get and set methods ----------------------------------------
    247 
    248 void StringSearch::setOffset(int32_t position, UErrorCode &status)
    249 {
    250     // status checked in usearch_setOffset
    251     usearch_setOffset(m_strsrch_, position, &status);
    252 }
    253 
    254 int32_t StringSearch::getOffset(void) const
    255 {
    256     return usearch_getOffset(m_strsrch_);
    257 }
    258 
    259 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
    260 {
    261     if (U_SUCCESS(status)) {
    262         m_text_ = text;
    263         usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
    264     }
    265 }
    266 
    267 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
    268 {
    269     if (U_SUCCESS(status)) {
    270         text.getText(m_text_);
    271         usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
    272     }
    273 }
    274 
    275 RuleBasedCollator * StringSearch::getCollator() const
    276 {
    277     return (RuleBasedCollator *)&m_collator_;
    278 }
    279 
    280 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
    281 {
    282     if (U_SUCCESS(status)) {
    283         usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
    284         // Alias the collator
    285         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
    286     }
    287 }
    288 
    289 void StringSearch::setPattern(const UnicodeString &pattern,
    290                                     UErrorCode    &status)
    291 {
    292     if (U_SUCCESS(status)) {
    293         m_pattern_ = pattern;
    294         usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
    295                            &status);
    296     }
    297 }
    298 
    299 const UnicodeString & StringSearch::getPattern() const
    300 {
    301     return m_pattern_;
    302 }
    303 
    304 // public methods ----------------------------------------------------
    305 
    306 void StringSearch::reset()
    307 {
    308     usearch_reset(m_strsrch_);
    309 }
    310 
    311 SearchIterator * StringSearch::safeClone(void) const
    312 {
    313     UErrorCode status = U_ZERO_ERROR;
    314     StringSearch *result = new StringSearch(m_pattern_, m_text_,
    315                                             (RuleBasedCollator *)&m_collator_,
    316                                             m_breakiterator_,
    317                                             status);
    318     /* test for NULL */
    319     if (result == 0) {
    320         status = U_MEMORY_ALLOCATION_ERROR;
    321         return 0;
    322     }
    323     result->setOffset(getOffset(), status);
    324     result->setMatchStart(m_strsrch_->search->matchedIndex);
    325     result->setMatchLength(m_strsrch_->search->matchedLength);
    326     if (U_FAILURE(status)) {
    327         return NULL;
    328     }
    329     return result;
    330 }
    331 
    332 // protected method -------------------------------------------------
    333 
    334 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
    335 {
    336     // values passed here are already in the pre-shift position
    337     if (U_SUCCESS(status)) {
    338         if (m_strsrch_->pattern.CELength == 0) {
    339             m_search_->matchedIndex =
    340                                     m_search_->matchedIndex == USEARCH_DONE ?
    341                                     getOffset() : m_search_->matchedIndex + 1;
    342             m_search_->matchedLength = 0;
    343             ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
    344                            &status);
    345             if (m_search_->matchedIndex == m_search_->textLength) {
    346                 m_search_->matchedIndex = USEARCH_DONE;
    347             }
    348         }
    349         else {
    350             // looking at usearch.cpp, this part is shifted out to
    351             // StringSearch instead of SearchIterator because m_strsrch_ is
    352             // not accessible in SearchIterator
    353 #if 0
    354             if (position + m_strsrch_->pattern.defaultShiftSize
    355                 > m_search_->textLength) {
    356                 setMatchNotFound();
    357                 return USEARCH_DONE;
    358             }
    359 #endif
    360             if (m_search_->matchedLength <= 0) {
    361                 // the flipping direction issue has already been handled
    362                 // in next()
    363                 // for boundary check purposes. this will ensure that the
    364                 // next match will not preceed the current offset
    365                 // note search->matchedIndex will always be set to something
    366                 // in the code
    367                 m_search_->matchedIndex = position - 1;
    368             }
    369 
    370             ucol_setOffset(m_strsrch_->textIter, position, &status);
    371 
    372 #if 0
    373             for (;;) {
    374                 if (m_search_->isCanonicalMatch) {
    375                     // can't use exact here since extra accents are allowed.
    376                     usearch_handleNextCanonical(m_strsrch_, &status);
    377                 }
    378                 else {
    379                     usearch_handleNextExact(m_strsrch_, &status);
    380                 }
    381                 if (U_FAILURE(status)) {
    382                     return USEARCH_DONE;
    383                 }
    384                 if (m_breakiterator_ == NULL
    385 #if !UCONFIG_NO_BREAK_ITERATION
    386                     ||
    387                     m_search_->matchedIndex == USEARCH_DONE ||
    388                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
    389                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
    390                                                   m_search_->matchedLength))
    391 #endif
    392                 ) {
    393                     if (m_search_->matchedIndex == USEARCH_DONE) {
    394                         ucol_setOffset(m_strsrch_->textIter,
    395                                        m_search_->textLength, &status);
    396                     }
    397                     else {
    398                         ucol_setOffset(m_strsrch_->textIter,
    399                                        m_search_->matchedIndex, &status);
    400                     }
    401                     return m_search_->matchedIndex;
    402                 }
    403             }
    404 #else
    405             // if m_strsrch_->breakIter is always the same as m_breakiterator_
    406             // then we don't need to check the match boundaries here because
    407             // usearch_handleNextXXX will already have done it.
    408             if (m_search_->isCanonicalMatch) {
    409             	// *could* actually use exact here 'cause no extra accents allowed...
    410             	usearch_handleNextCanonical(m_strsrch_, &status);
    411             } else {
    412             	usearch_handleNextExact(m_strsrch_, &status);
    413             }
    414 
    415             if (U_FAILURE(status)) {
    416             	return USEARCH_DONE;
    417             }
    418 
    419             if (m_search_->matchedIndex == USEARCH_DONE) {
    420             	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
    421             } else {
    422             	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
    423             }
    424 
    425             return m_search_->matchedIndex;
    426 #endif
    427         }
    428     }
    429     return USEARCH_DONE;
    430 }
    431 
    432 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
    433 {
    434     // values passed here are already in the pre-shift position
    435     if (U_SUCCESS(status)) {
    436         if (m_strsrch_->pattern.CELength == 0) {
    437             m_search_->matchedIndex =
    438                   (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
    439                    m_search_->matchedIndex);
    440             if (m_search_->matchedIndex == 0) {
    441                 setMatchNotFound();
    442             }
    443             else {
    444                 m_search_->matchedIndex --;
    445                 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
    446                                &status);
    447                 m_search_->matchedLength = 0;
    448             }
    449         }
    450         else {
    451             // looking at usearch.cpp, this part is shifted out to
    452             // StringSearch instead of SearchIterator because m_strsrch_ is
    453             // not accessible in SearchIterator
    454 #if 0
    455             if (!m_search_->isOverlap &&
    456                 position - m_strsrch_->pattern.defaultShiftSize < 0) {
    457                 setMatchNotFound();
    458                 return USEARCH_DONE;
    459             }
    460 
    461             for (;;) {
    462                 if (m_search_->isCanonicalMatch) {
    463                     // can't use exact here since extra accents are allowed.
    464                     usearch_handlePreviousCanonical(m_strsrch_, &status);
    465                 }
    466                 else {
    467                     usearch_handlePreviousExact(m_strsrch_, &status);
    468                 }
    469                 if (U_FAILURE(status)) {
    470                     return USEARCH_DONE;
    471                 }
    472                 if (m_breakiterator_ == NULL
    473 #if !UCONFIG_NO_BREAK_ITERATION
    474                     ||
    475                     m_search_->matchedIndex == USEARCH_DONE ||
    476                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
    477                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
    478                                                   m_search_->matchedLength))
    479 #endif
    480                 ) {
    481                     return m_search_->matchedIndex;
    482                 }
    483             }
    484 #else
    485             ucol_setOffset(m_strsrch_->textIter, position, &status);
    486 
    487             if (m_search_->isCanonicalMatch) {
    488             	// *could* use exact match here since extra accents *not* allowed!
    489             	usearch_handlePreviousCanonical(m_strsrch_, &status);
    490             } else {
    491             	usearch_handlePreviousExact(m_strsrch_, &status);
    492             }
    493 
    494             if (U_FAILURE(status)) {
    495             	return USEARCH_DONE;
    496             }
    497 
    498             return m_search_->matchedIndex;
    499 #endif
    500         }
    501 
    502         return m_search_->matchedIndex;
    503     }
    504     return USEARCH_DONE;
    505 }
    506 
    507 U_NAMESPACE_END
    508 
    509 #endif /* #if !UCONFIG_NO_COLLATION */
    510