Home | History | Annotate | Download | only in i18n
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2001-2014 IBM and others. All rights reserved.
      4 **********************************************************************
      5 *   Date        Name        Description
      6 *  03/22/2000   helena      Creation.
      7 **********************************************************************
      8 */
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
     13 
     14 #include "unicode/stsearch.h"
     15 #include "usrchimp.h"
     16 #include "cmemory.h"
     17 
     18 U_NAMESPACE_BEGIN
     19 
     20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
     21 
     22 // public constructors and destructors -----------------------------------
     23 
     24 StringSearch::StringSearch(const UnicodeString &pattern,
     25                            const UnicodeString &text,
     26                            const Locale        &locale,
     27                                  BreakIterator *breakiter,
     28                                  UErrorCode    &status) :
     29                            SearchIterator(text, breakiter),
     30                            m_pattern_(pattern)
     31 {
     32     if (U_FAILURE(status)) {
     33         m_strsrch_ = NULL;
     34         return;
     35     }
     36 
     37     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
     38                               m_text_.getBuffer(), m_text_.length(),
     39                               locale.getName(), (UBreakIterator *)breakiter,
     40                               &status);
     41     uprv_free(m_search_);
     42     m_search_ = NULL;
     43 
     44     if (U_SUCCESS(status)) {
     45         // m_search_ has been created by the base SearchIterator class
     46         m_search_        = m_strsrch_->search;
     47     }
     48 }
     49 
     50 StringSearch::StringSearch(const UnicodeString     &pattern,
     51                            const UnicodeString     &text,
     52                                  RuleBasedCollator *coll,
     53                                  BreakIterator     *breakiter,
     54                                  UErrorCode        &status) :
     55                            SearchIterator(text, breakiter),
     56                            m_pattern_(pattern)
     57 {
     58     if (U_FAILURE(status)) {
     59         m_strsrch_ = NULL;
     60         return;
     61     }
     62     if (coll == NULL) {
     63         status     = U_ILLEGAL_ARGUMENT_ERROR;
     64         m_strsrch_ = NULL;
     65         return;
     66     }
     67     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
     68                                           m_pattern_.length(),
     69                                           m_text_.getBuffer(),
     70                                           m_text_.length(), coll->toUCollator(),
     71                                           (UBreakIterator *)breakiter,
     72                                           &status);
     73     uprv_free(m_search_);
     74     m_search_ = NULL;
     75 
     76     if (U_SUCCESS(status)) {
     77         // m_search_ has been created by the base SearchIterator class
     78         m_search_ = m_strsrch_->search;
     79     }
     80 }
     81 
     82 StringSearch::StringSearch(const UnicodeString     &pattern,
     83                                  CharacterIterator &text,
     84                            const Locale            &locale,
     85                                  BreakIterator     *breakiter,
     86                                  UErrorCode        &status) :
     87                            SearchIterator(text, breakiter),
     88                            m_pattern_(pattern)
     89 {
     90     if (U_FAILURE(status)) {
     91         m_strsrch_ = NULL;
     92         return;
     93     }
     94     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
     95                               m_text_.getBuffer(), m_text_.length(),
     96                               locale.getName(), (UBreakIterator *)breakiter,
     97                               &status);
     98     uprv_free(m_search_);
     99     m_search_ = NULL;
    100 
    101     if (U_SUCCESS(status)) {
    102         // m_search_ has been created by the base SearchIterator class
    103         m_search_ = m_strsrch_->search;
    104     }
    105 }
    106 
    107 StringSearch::StringSearch(const UnicodeString     &pattern,
    108                                  CharacterIterator &text,
    109                                  RuleBasedCollator *coll,
    110                                  BreakIterator     *breakiter,
    111                                  UErrorCode        &status) :
    112                            SearchIterator(text, breakiter),
    113                            m_pattern_(pattern)
    114 {
    115     if (U_FAILURE(status)) {
    116         m_strsrch_ = NULL;
    117         return;
    118     }
    119     if (coll == NULL) {
    120         status     = U_ILLEGAL_ARGUMENT_ERROR;
    121         m_strsrch_ = NULL;
    122         return;
    123     }
    124     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    125                                           m_pattern_.length(),
    126                                           m_text_.getBuffer(),
    127                                           m_text_.length(), coll->toUCollator(),
    128                                           (UBreakIterator *)breakiter,
    129                                           &status);
    130     uprv_free(m_search_);
    131     m_search_ = NULL;
    132 
    133     if (U_SUCCESS(status)) {
    134         // m_search_ has been created by the base SearchIterator class
    135         m_search_ = m_strsrch_->search;
    136     }
    137 }
    138 
    139 StringSearch::StringSearch(const StringSearch &that) :
    140                        SearchIterator(that.m_text_, that.m_breakiterator_),
    141                        m_pattern_(that.m_pattern_)
    142 {
    143     UErrorCode status = U_ZERO_ERROR;
    144 
    145     // Free m_search_ from the superclass
    146     uprv_free(m_search_);
    147     m_search_ = NULL;
    148 
    149     if (that.m_strsrch_ == NULL) {
    150         // This was not a good copy
    151         m_strsrch_ = NULL;
    152     }
    153     else {
    154         // Make a deep copy
    155         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    156                                               m_pattern_.length(),
    157                                               m_text_.getBuffer(),
    158                                               m_text_.length(),
    159                                               that.m_strsrch_->collator,
    160                                              (UBreakIterator *)that.m_breakiterator_,
    161                                               &status);
    162         if (U_SUCCESS(status)) {
    163             // m_search_ has been created by the base SearchIterator class
    164             m_search_        = m_strsrch_->search;
    165         }
    166     }
    167 }
    168 
    169 StringSearch::~StringSearch()
    170 {
    171     if (m_strsrch_ != NULL) {
    172         usearch_close(m_strsrch_);
    173         m_search_ = NULL;
    174     }
    175 }
    176 
    177 StringSearch *
    178 StringSearch::clone() const {
    179     return new StringSearch(*this);
    180 }
    181 
    182 // operator overloading ---------------------------------------------
    183 StringSearch & StringSearch::operator=(const StringSearch &that)
    184 {
    185     if ((*this) != that) {
    186         UErrorCode status = U_ZERO_ERROR;
    187         m_text_          = that.m_text_;
    188         m_breakiterator_ = that.m_breakiterator_;
    189         m_pattern_       = that.m_pattern_;
    190         // all m_search_ in the parent class is linked up with m_strsrch_
    191         usearch_close(m_strsrch_);
    192         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    193                                               m_pattern_.length(),
    194                                               m_text_.getBuffer(),
    195                                               m_text_.length(),
    196                                               that.m_strsrch_->collator,
    197                                               NULL, &status);
    198         // Check null pointer
    199         if (m_strsrch_ != NULL) {
    200             m_search_ = m_strsrch_->search;
    201         }
    202     }
    203     return *this;
    204 }
    205 
    206 UBool StringSearch::operator==(const SearchIterator &that) const
    207 {
    208     if (this == &that) {
    209         return TRUE;
    210     }
    211     if (SearchIterator::operator ==(that)) {
    212         StringSearch &thatsrch = (StringSearch &)that;
    213         return (this->m_pattern_ == thatsrch.m_pattern_ &&
    214                 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
    215     }
    216     return FALSE;
    217 }
    218 
    219 // public get and set methods ----------------------------------------
    220 
    221 void StringSearch::setOffset(int32_t position, UErrorCode &status)
    222 {
    223     // status checked in usearch_setOffset
    224     usearch_setOffset(m_strsrch_, position, &status);
    225 }
    226 
    227 int32_t StringSearch::getOffset(void) const
    228 {
    229     return usearch_getOffset(m_strsrch_);
    230 }
    231 
    232 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
    233 {
    234     if (U_SUCCESS(status)) {
    235         m_text_ = text;
    236         usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
    237     }
    238 }
    239 
    240 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
    241 {
    242     if (U_SUCCESS(status)) {
    243         text.getText(m_text_);
    244         usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
    245     }
    246 }
    247 
    248 RuleBasedCollator * StringSearch::getCollator() const
    249 {
    250     // Note the const_cast. It would be cleaner if this const method returned a const collator.
    251     return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
    252 }
    253 
    254 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
    255 {
    256     if (U_SUCCESS(status)) {
    257         usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
    258     }
    259 }
    260 
    261 void StringSearch::setPattern(const UnicodeString &pattern,
    262                                     UErrorCode    &status)
    263 {
    264     if (U_SUCCESS(status)) {
    265         m_pattern_ = pattern;
    266         usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
    267                            &status);
    268     }
    269 }
    270 
    271 const UnicodeString & StringSearch::getPattern() const
    272 {
    273     return m_pattern_;
    274 }
    275 
    276 // public methods ----------------------------------------------------
    277 
    278 void StringSearch::reset()
    279 {
    280     usearch_reset(m_strsrch_);
    281 }
    282 
    283 SearchIterator * StringSearch::safeClone(void) const
    284 {
    285     UErrorCode status = U_ZERO_ERROR;
    286     StringSearch *result = new StringSearch(m_pattern_, m_text_,
    287                                             getCollator(),
    288                                             m_breakiterator_,
    289                                             status);
    290     /* test for NULL */
    291     if (result == 0) {
    292         status = U_MEMORY_ALLOCATION_ERROR;
    293         return 0;
    294     }
    295     result->setOffset(getOffset(), status);
    296     result->setMatchStart(m_strsrch_->search->matchedIndex);
    297     result->setMatchLength(m_strsrch_->search->matchedLength);
    298     if (U_FAILURE(status)) {
    299         return NULL;
    300     }
    301     return result;
    302 }
    303 
    304 // protected method -------------------------------------------------
    305 
    306 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
    307 {
    308     // values passed here are already in the pre-shift position
    309     if (U_SUCCESS(status)) {
    310         if (m_strsrch_->pattern.CELength == 0) {
    311             m_search_->matchedIndex =
    312                                     m_search_->matchedIndex == USEARCH_DONE ?
    313                                     getOffset() : m_search_->matchedIndex + 1;
    314             m_search_->matchedLength = 0;
    315             ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
    316                            &status);
    317             if (m_search_->matchedIndex == m_search_->textLength) {
    318                 m_search_->matchedIndex = USEARCH_DONE;
    319             }
    320         }
    321         else {
    322             // looking at usearch.cpp, this part is shifted out to
    323             // StringSearch instead of SearchIterator because m_strsrch_ is
    324             // not accessible in SearchIterator
    325 #if 0
    326             if (position + m_strsrch_->pattern.defaultShiftSize
    327                 > m_search_->textLength) {
    328                 setMatchNotFound();
    329                 return USEARCH_DONE;
    330             }
    331 #endif
    332             if (m_search_->matchedLength <= 0) {
    333                 // the flipping direction issue has already been handled
    334                 // in next()
    335                 // for boundary check purposes. this will ensure that the
    336                 // next match will not preceed the current offset
    337                 // note search->matchedIndex will always be set to something
    338                 // in the code
    339                 m_search_->matchedIndex = position - 1;
    340             }
    341 
    342             ucol_setOffset(m_strsrch_->textIter, position, &status);
    343 
    344 #if 0
    345             for (;;) {
    346                 if (m_search_->isCanonicalMatch) {
    347                     // can't use exact here since extra accents are allowed.
    348                     usearch_handleNextCanonical(m_strsrch_, &status);
    349                 }
    350                 else {
    351                     usearch_handleNextExact(m_strsrch_, &status);
    352                 }
    353                 if (U_FAILURE(status)) {
    354                     return USEARCH_DONE;
    355                 }
    356                 if (m_breakiterator_ == NULL
    357 #if !UCONFIG_NO_BREAK_ITERATION
    358                     ||
    359                     m_search_->matchedIndex == USEARCH_DONE ||
    360                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
    361                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
    362                                                   m_search_->matchedLength))
    363 #endif
    364                 ) {
    365                     if (m_search_->matchedIndex == USEARCH_DONE) {
    366                         ucol_setOffset(m_strsrch_->textIter,
    367                                        m_search_->textLength, &status);
    368                     }
    369                     else {
    370                         ucol_setOffset(m_strsrch_->textIter,
    371                                        m_search_->matchedIndex, &status);
    372                     }
    373                     return m_search_->matchedIndex;
    374                 }
    375             }
    376 #else
    377             // if m_strsrch_->breakIter is always the same as m_breakiterator_
    378             // then we don't need to check the match boundaries here because
    379             // usearch_handleNextXXX will already have done it.
    380             if (m_search_->isCanonicalMatch) {
    381             	// *could* actually use exact here 'cause no extra accents allowed...
    382             	usearch_handleNextCanonical(m_strsrch_, &status);
    383             } else {
    384             	usearch_handleNextExact(m_strsrch_, &status);
    385             }
    386 
    387             if (U_FAILURE(status)) {
    388             	return USEARCH_DONE;
    389             }
    390 
    391             if (m_search_->matchedIndex == USEARCH_DONE) {
    392             	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
    393             } else {
    394             	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
    395             }
    396 
    397             return m_search_->matchedIndex;
    398 #endif
    399         }
    400     }
    401     return USEARCH_DONE;
    402 }
    403 
    404 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
    405 {
    406     // values passed here are already in the pre-shift position
    407     if (U_SUCCESS(status)) {
    408         if (m_strsrch_->pattern.CELength == 0) {
    409             m_search_->matchedIndex =
    410                   (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
    411                    m_search_->matchedIndex);
    412             if (m_search_->matchedIndex == 0) {
    413                 setMatchNotFound();
    414             }
    415             else {
    416                 m_search_->matchedIndex --;
    417                 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
    418                                &status);
    419                 m_search_->matchedLength = 0;
    420             }
    421         }
    422         else {
    423             // looking at usearch.cpp, this part is shifted out to
    424             // StringSearch instead of SearchIterator because m_strsrch_ is
    425             // not accessible in SearchIterator
    426 #if 0
    427             if (!m_search_->isOverlap &&
    428                 position - m_strsrch_->pattern.defaultShiftSize < 0) {
    429                 setMatchNotFound();
    430                 return USEARCH_DONE;
    431             }
    432 
    433             for (;;) {
    434                 if (m_search_->isCanonicalMatch) {
    435                     // can't use exact here since extra accents are allowed.
    436                     usearch_handlePreviousCanonical(m_strsrch_, &status);
    437                 }
    438                 else {
    439                     usearch_handlePreviousExact(m_strsrch_, &status);
    440                 }
    441                 if (U_FAILURE(status)) {
    442                     return USEARCH_DONE;
    443                 }
    444                 if (m_breakiterator_ == NULL
    445 #if !UCONFIG_NO_BREAK_ITERATION
    446                     ||
    447                     m_search_->matchedIndex == USEARCH_DONE ||
    448                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
    449                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
    450                                                   m_search_->matchedLength))
    451 #endif
    452                 ) {
    453                     return m_search_->matchedIndex;
    454                 }
    455             }
    456 #else
    457             ucol_setOffset(m_strsrch_->textIter, position, &status);
    458 
    459             if (m_search_->isCanonicalMatch) {
    460             	// *could* use exact match here since extra accents *not* allowed!
    461             	usearch_handlePreviousCanonical(m_strsrch_, &status);
    462             } else {
    463             	usearch_handlePreviousExact(m_strsrch_, &status);
    464             }
    465 
    466             if (U_FAILURE(status)) {
    467             	return USEARCH_DONE;
    468             }
    469 
    470             return m_search_->matchedIndex;
    471 #endif
    472         }
    473 
    474         return m_search_->matchedIndex;
    475     }
    476     return USEARCH_DONE;
    477 }
    478 
    479 U_NAMESPACE_END
    480 
    481 #endif /* #if !UCONFIG_NO_COLLATION */
    482