Home | History | Annotate | Download | only in i18n
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2001-2014 IBM and others. All rights reserved.
      6 **********************************************************************
      7 *   Date        Name        Description
      8 *  03/22/2000   helena      Creation.
      9 **********************************************************************
     10 */
     11 
     12 #include "unicode/utypes.h"
     13 
     14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
     15 
     16 #include "unicode/stsearch.h"
     17 #include "usrchimp.h"
     18 #include "cmemory.h"
     19 
     20 U_NAMESPACE_BEGIN
     21 
     22 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
     23 
     24 // public constructors and destructors -----------------------------------
     25 
     26 StringSearch::StringSearch(const UnicodeString &pattern,
     27                            const UnicodeString &text,
     28                            const Locale        &locale,
     29                                  BreakIterator *breakiter,
     30                                  UErrorCode    &status) :
     31                            SearchIterator(text, breakiter),
     32                            m_pattern_(pattern)
     33 {
     34     if (U_FAILURE(status)) {
     35         m_strsrch_ = NULL;
     36         return;
     37     }
     38 
     39     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
     40                               m_text_.getBuffer(), m_text_.length(),
     41                               locale.getName(), (UBreakIterator *)breakiter,
     42                               &status);
     43     uprv_free(m_search_);
     44     m_search_ = NULL;
     45 
     46     if (U_SUCCESS(status)) {
     47         // m_search_ has been created by the base SearchIterator class
     48         m_search_        = m_strsrch_->search;
     49     }
     50 }
     51 
     52 StringSearch::StringSearch(const UnicodeString     &pattern,
     53                            const UnicodeString     &text,
     54                                  RuleBasedCollator *coll,
     55                                  BreakIterator     *breakiter,
     56                                  UErrorCode        &status) :
     57                            SearchIterator(text, breakiter),
     58                            m_pattern_(pattern)
     59 {
     60     if (U_FAILURE(status)) {
     61         m_strsrch_ = NULL;
     62         return;
     63     }
     64     if (coll == NULL) {
     65         status     = U_ILLEGAL_ARGUMENT_ERROR;
     66         m_strsrch_ = NULL;
     67         return;
     68     }
     69     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
     70                                           m_pattern_.length(),
     71                                           m_text_.getBuffer(),
     72                                           m_text_.length(), coll->toUCollator(),
     73                                           (UBreakIterator *)breakiter,
     74                                           &status);
     75     uprv_free(m_search_);
     76     m_search_ = NULL;
     77 
     78     if (U_SUCCESS(status)) {
     79         // m_search_ has been created by the base SearchIterator class
     80         m_search_ = m_strsrch_->search;
     81     }
     82 }
     83 
     84 StringSearch::StringSearch(const UnicodeString     &pattern,
     85                                  CharacterIterator &text,
     86                            const Locale            &locale,
     87                                  BreakIterator     *breakiter,
     88                                  UErrorCode        &status) :
     89                            SearchIterator(text, breakiter),
     90                            m_pattern_(pattern)
     91 {
     92     if (U_FAILURE(status)) {
     93         m_strsrch_ = NULL;
     94         return;
     95     }
     96     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
     97                               m_text_.getBuffer(), m_text_.length(),
     98                               locale.getName(), (UBreakIterator *)breakiter,
     99                               &status);
    100     uprv_free(m_search_);
    101     m_search_ = NULL;
    102 
    103     if (U_SUCCESS(status)) {
    104         // m_search_ has been created by the base SearchIterator class
    105         m_search_ = m_strsrch_->search;
    106     }
    107 }
    108 
    109 StringSearch::StringSearch(const UnicodeString     &pattern,
    110                                  CharacterIterator &text,
    111                                  RuleBasedCollator *coll,
    112                                  BreakIterator     *breakiter,
    113                                  UErrorCode        &status) :
    114                            SearchIterator(text, breakiter),
    115                            m_pattern_(pattern)
    116 {
    117     if (U_FAILURE(status)) {
    118         m_strsrch_ = NULL;
    119         return;
    120     }
    121     if (coll == NULL) {
    122         status     = U_ILLEGAL_ARGUMENT_ERROR;
    123         m_strsrch_ = NULL;
    124         return;
    125     }
    126     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    127                                           m_pattern_.length(),
    128                                           m_text_.getBuffer(),
    129                                           m_text_.length(), coll->toUCollator(),
    130                                           (UBreakIterator *)breakiter,
    131                                           &status);
    132     uprv_free(m_search_);
    133     m_search_ = NULL;
    134 
    135     if (U_SUCCESS(status)) {
    136         // m_search_ has been created by the base SearchIterator class
    137         m_search_ = m_strsrch_->search;
    138     }
    139 }
    140 
    141 StringSearch::StringSearch(const StringSearch &that) :
    142                        SearchIterator(that.m_text_, that.m_breakiterator_),
    143                        m_pattern_(that.m_pattern_)
    144 {
    145     UErrorCode status = U_ZERO_ERROR;
    146 
    147     // Free m_search_ from the superclass
    148     uprv_free(m_search_);
    149     m_search_ = NULL;
    150 
    151     if (that.m_strsrch_ == NULL) {
    152         // This was not a good copy
    153         m_strsrch_ = NULL;
    154     }
    155     else {
    156         // Make a deep copy
    157         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    158                                               m_pattern_.length(),
    159                                               m_text_.getBuffer(),
    160                                               m_text_.length(),
    161                                               that.m_strsrch_->collator,
    162                                              (UBreakIterator *)that.m_breakiterator_,
    163                                               &status);
    164         if (U_SUCCESS(status)) {
    165             // m_search_ has been created by the base SearchIterator class
    166             m_search_        = m_strsrch_->search;
    167         }
    168     }
    169 }
    170 
    171 StringSearch::~StringSearch()
    172 {
    173     if (m_strsrch_ != NULL) {
    174         usearch_close(m_strsrch_);
    175         m_search_ = NULL;
    176     }
    177 }
    178 
    179 StringSearch *
    180 StringSearch::clone() const {
    181     return new StringSearch(*this);
    182 }
    183 
    184 // operator overloading ---------------------------------------------
    185 StringSearch & StringSearch::operator=(const StringSearch &that)
    186 {
    187     if ((*this) != that) {
    188         UErrorCode status = U_ZERO_ERROR;
    189         m_text_          = that.m_text_;
    190         m_breakiterator_ = that.m_breakiterator_;
    191         m_pattern_       = that.m_pattern_;
    192         // all m_search_ in the parent class is linked up with m_strsrch_
    193         usearch_close(m_strsrch_);
    194         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    195                                               m_pattern_.length(),
    196                                               m_text_.getBuffer(),
    197                                               m_text_.length(),
    198                                               that.m_strsrch_->collator,
    199                                               NULL, &status);
    200         // Check null pointer
    201         if (m_strsrch_ != NULL) {
    202             m_search_ = m_strsrch_->search;
    203         }
    204     }
    205     return *this;
    206 }
    207 
    208 UBool StringSearch::operator==(const SearchIterator &that) const
    209 {
    210     if (this == &that) {
    211         return TRUE;
    212     }
    213     if (SearchIterator::operator ==(that)) {
    214         StringSearch &thatsrch = (StringSearch &)that;
    215         return (this->m_pattern_ == thatsrch.m_pattern_ &&
    216                 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
    217     }
    218     return FALSE;
    219 }
    220 
    221 // public get and set methods ----------------------------------------
    222 
    223 void StringSearch::setOffset(int32_t position, UErrorCode &status)
    224 {
    225     // status checked in usearch_setOffset
    226     usearch_setOffset(m_strsrch_, position, &status);
    227 }
    228 
    229 int32_t StringSearch::getOffset(void) const
    230 {
    231     return usearch_getOffset(m_strsrch_);
    232 }
    233 
    234 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
    235 {
    236     if (U_SUCCESS(status)) {
    237         m_text_ = text;
    238         usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
    239     }
    240 }
    241 
    242 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
    243 {
    244     if (U_SUCCESS(status)) {
    245         text.getText(m_text_);
    246         usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
    247     }
    248 }
    249 
    250 RuleBasedCollator * StringSearch::getCollator() const
    251 {
    252     // Note the const_cast. It would be cleaner if this const method returned a const collator.
    253     return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
    254 }
    255 
    256 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
    257 {
    258     if (U_SUCCESS(status)) {
    259         usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
    260     }
    261 }
    262 
    263 void StringSearch::setPattern(const UnicodeString &pattern,
    264                                     UErrorCode    &status)
    265 {
    266     if (U_SUCCESS(status)) {
    267         m_pattern_ = pattern;
    268         usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
    269                            &status);
    270     }
    271 }
    272 
    273 const UnicodeString & StringSearch::getPattern() const
    274 {
    275     return m_pattern_;
    276 }
    277 
    278 // public methods ----------------------------------------------------
    279 
    280 void StringSearch::reset()
    281 {
    282     usearch_reset(m_strsrch_);
    283 }
    284 
    285 SearchIterator * StringSearch::safeClone(void) const
    286 {
    287     UErrorCode status = U_ZERO_ERROR;
    288     StringSearch *result = new StringSearch(m_pattern_, m_text_,
    289                                             getCollator(),
    290                                             m_breakiterator_,
    291                                             status);
    292     /* test for NULL */
    293     if (result == 0) {
    294         status = U_MEMORY_ALLOCATION_ERROR;
    295         return 0;
    296     }
    297     result->setOffset(getOffset(), status);
    298     result->setMatchStart(m_strsrch_->search->matchedIndex);
    299     result->setMatchLength(m_strsrch_->search->matchedLength);
    300     if (U_FAILURE(status)) {
    301         return NULL;
    302     }
    303     return result;
    304 }
    305 
    306 // protected method -------------------------------------------------
    307 
    308 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
    309 {
    310     // values passed here are already in the pre-shift position
    311     if (U_SUCCESS(status)) {
    312         if (m_strsrch_->pattern.cesLength == 0) {
    313             m_search_->matchedIndex =
    314                                     m_search_->matchedIndex == USEARCH_DONE ?
    315                                     getOffset() : m_search_->matchedIndex + 1;
    316             m_search_->matchedLength = 0;
    317             ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
    318                            &status);
    319             if (m_search_->matchedIndex == m_search_->textLength) {
    320                 m_search_->matchedIndex = USEARCH_DONE;
    321             }
    322         }
    323         else {
    324             // looking at usearch.cpp, this part is shifted out to
    325             // StringSearch instead of SearchIterator because m_strsrch_ is
    326             // not accessible in SearchIterator
    327 #if 0
    328             if (position + m_strsrch_->pattern.defaultShiftSize
    329                 > m_search_->textLength) {
    330                 setMatchNotFound();
    331                 return USEARCH_DONE;
    332             }
    333 #endif
    334             if (m_search_->matchedLength <= 0) {
    335                 // the flipping direction issue has already been handled
    336                 // in next()
    337                 // for boundary check purposes. this will ensure that the
    338                 // next match will not preceed the current offset
    339                 // note search->matchedIndex will always be set to something
    340                 // in the code
    341                 m_search_->matchedIndex = position - 1;
    342             }
    343 
    344             ucol_setOffset(m_strsrch_->textIter, position, &status);
    345 
    346 #if 0
    347             for (;;) {
    348                 if (m_search_->isCanonicalMatch) {
    349                     // can't use exact here since extra accents are allowed.
    350                     usearch_handleNextCanonical(m_strsrch_, &status);
    351                 }
    352                 else {
    353                     usearch_handleNextExact(m_strsrch_, &status);
    354                 }
    355                 if (U_FAILURE(status)) {
    356                     return USEARCH_DONE;
    357                 }
    358                 if (m_breakiterator_ == NULL
    359 #if !UCONFIG_NO_BREAK_ITERATION
    360                     ||
    361                     m_search_->matchedIndex == USEARCH_DONE ||
    362                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
    363                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
    364                                                   m_search_->matchedLength))
    365 #endif
    366                 ) {
    367                     if (m_search_->matchedIndex == USEARCH_DONE) {
    368                         ucol_setOffset(m_strsrch_->textIter,
    369                                        m_search_->textLength, &status);
    370                     }
    371                     else {
    372                         ucol_setOffset(m_strsrch_->textIter,
    373                                        m_search_->matchedIndex, &status);
    374                     }
    375                     return m_search_->matchedIndex;
    376                 }
    377             }
    378 #else
    379             // if m_strsrch_->breakIter is always the same as m_breakiterator_
    380             // then we don't need to check the match boundaries here because
    381             // usearch_handleNextXXX will already have done it.
    382             if (m_search_->isCanonicalMatch) {
    383             	// *could* actually use exact here 'cause no extra accents allowed...
    384             	usearch_handleNextCanonical(m_strsrch_, &status);
    385             } else {
    386             	usearch_handleNextExact(m_strsrch_, &status);
    387             }
    388 
    389             if (U_FAILURE(status)) {
    390             	return USEARCH_DONE;
    391             }
    392 
    393             if (m_search_->matchedIndex == USEARCH_DONE) {
    394             	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
    395             } else {
    396             	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
    397             }
    398 
    399             return m_search_->matchedIndex;
    400 #endif
    401         }
    402     }
    403     return USEARCH_DONE;
    404 }
    405 
    406 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
    407 {
    408     // values passed here are already in the pre-shift position
    409     if (U_SUCCESS(status)) {
    410         if (m_strsrch_->pattern.cesLength == 0) {
    411             m_search_->matchedIndex =
    412                   (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
    413                    m_search_->matchedIndex);
    414             if (m_search_->matchedIndex == 0) {
    415                 setMatchNotFound();
    416             }
    417             else {
    418                 m_search_->matchedIndex --;
    419                 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
    420                                &status);
    421                 m_search_->matchedLength = 0;
    422             }
    423         }
    424         else {
    425             // looking at usearch.cpp, this part is shifted out to
    426             // StringSearch instead of SearchIterator because m_strsrch_ is
    427             // not accessible in SearchIterator
    428 #if 0
    429             if (!m_search_->isOverlap &&
    430                 position - m_strsrch_->pattern.defaultShiftSize < 0) {
    431                 setMatchNotFound();
    432                 return USEARCH_DONE;
    433             }
    434 
    435             for (;;) {
    436                 if (m_search_->isCanonicalMatch) {
    437                     // can't use exact here since extra accents are allowed.
    438                     usearch_handlePreviousCanonical(m_strsrch_, &status);
    439                 }
    440                 else {
    441                     usearch_handlePreviousExact(m_strsrch_, &status);
    442                 }
    443                 if (U_FAILURE(status)) {
    444                     return USEARCH_DONE;
    445                 }
    446                 if (m_breakiterator_ == NULL
    447 #if !UCONFIG_NO_BREAK_ITERATION
    448                     ||
    449                     m_search_->matchedIndex == USEARCH_DONE ||
    450                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
    451                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
    452                                                   m_search_->matchedLength))
    453 #endif
    454                 ) {
    455                     return m_search_->matchedIndex;
    456                 }
    457             }
    458 #else
    459             ucol_setOffset(m_strsrch_->textIter, position, &status);
    460 
    461             if (m_search_->isCanonicalMatch) {
    462             	// *could* use exact match here since extra accents *not* allowed!
    463             	usearch_handlePreviousCanonical(m_strsrch_, &status);
    464             } else {
    465             	usearch_handlePreviousExact(m_strsrch_, &status);
    466             }
    467 
    468             if (U_FAILURE(status)) {
    469             	return USEARCH_DONE;
    470             }
    471 
    472             return m_search_->matchedIndex;
    473 #endif
    474         }
    475 
    476         return m_search_->matchedIndex;
    477     }
    478     return USEARCH_DONE;
    479 }
    480 
    481 U_NAMESPACE_END
    482 
    483 #endif /* #if !UCONFIG_NO_COLLATION */
    484