Home | History | Annotate | Download | only in i18n
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
      6 **********************************************************************
      7 *   Date        Name        Description
      8 *  03/22/2000   helena      Creation.
      9 **********************************************************************
     10 */
     11 
     12 #include "unicode/utypes.h"
     13 
     14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
     15 
     16 #include "unicode/brkiter.h"
     17 #include "unicode/schriter.h"
     18 #include "unicode/search.h"
     19 #include "usrchimp.h"
     20 #include "cmemory.h"
     21 
     22 // public constructors and destructors -----------------------------------
     23 U_NAMESPACE_BEGIN
     24 
     25 SearchIterator::SearchIterator(const SearchIterator &other)
     26     : UObject(other)
     27 {
     28     m_breakiterator_            = other.m_breakiterator_;
     29     m_text_                     = other.m_text_;
     30     m_search_                   = (USearch *)uprv_malloc(sizeof(USearch));
     31     m_search_->breakIter        = other.m_search_->breakIter;
     32     m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
     33     m_search_->isOverlap        = other.m_search_->isOverlap;
     34     m_search_->elementComparisonType = other.m_search_->elementComparisonType;
     35     m_search_->matchedIndex     = other.m_search_->matchedIndex;
     36     m_search_->matchedLength    = other.m_search_->matchedLength;
     37     m_search_->text             = other.m_search_->text;
     38     m_search_->textLength       = other.m_search_->textLength;
     39 }
     40 
     41 SearchIterator::~SearchIterator()
     42 {
     43     if (m_search_ != NULL) {
     44         uprv_free(m_search_);
     45     }
     46 }
     47 
     48 // public get and set methods ----------------------------------------
     49 
     50 void SearchIterator::setAttribute(USearchAttribute       attribute,
     51                                   USearchAttributeValue  value,
     52                                   UErrorCode            &status)
     53 {
     54     if (U_SUCCESS(status)) {
     55         switch (attribute)
     56         {
     57         case USEARCH_OVERLAP :
     58             m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
     59             break;
     60         case USEARCH_CANONICAL_MATCH :
     61             m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
     62             break;
     63         case USEARCH_ELEMENT_COMPARISON :
     64             if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
     65                 m_search_->elementComparisonType = (int16_t)value;
     66             } else {
     67                 m_search_->elementComparisonType = 0;
     68             }
     69             break;
     70         default:
     71             status = U_ILLEGAL_ARGUMENT_ERROR;
     72         }
     73     }
     74     if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
     75         status = U_ILLEGAL_ARGUMENT_ERROR;
     76     }
     77 }
     78 
     79 USearchAttributeValue SearchIterator::getAttribute(
     80                                           USearchAttribute  attribute) const
     81 {
     82     switch (attribute) {
     83     case USEARCH_OVERLAP :
     84         return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
     85     case USEARCH_CANONICAL_MATCH :
     86         return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
     87                                                                 USEARCH_OFF);
     88     case USEARCH_ELEMENT_COMPARISON :
     89         {
     90             int16_t value = m_search_->elementComparisonType;
     91             if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
     92                 return (USearchAttributeValue)value;
     93             } else {
     94                 return USEARCH_STANDARD_ELEMENT_COMPARISON;
     95             }
     96         }
     97     default :
     98         return USEARCH_DEFAULT;
     99     }
    100 }
    101 
    102 int32_t SearchIterator::getMatchedStart() const
    103 {
    104     return m_search_->matchedIndex;
    105 }
    106 
    107 int32_t SearchIterator::getMatchedLength() const
    108 {
    109     return m_search_->matchedLength;
    110 }
    111 
    112 void SearchIterator::getMatchedText(UnicodeString &result) const
    113 {
    114     int32_t matchedindex  = m_search_->matchedIndex;
    115     int32_t     matchedlength = m_search_->matchedLength;
    116     if (matchedindex != USEARCH_DONE && matchedlength != 0) {
    117         result.setTo(m_search_->text + matchedindex, matchedlength);
    118     }
    119     else {
    120         result.remove();
    121     }
    122 }
    123 
    124 void SearchIterator::setBreakIterator(BreakIterator *breakiter,
    125                                       UErrorCode &status)
    126 {
    127     if (U_SUCCESS(status)) {
    128 #if 0
    129         m_search_->breakIter = NULL;
    130         // the c++ breakiterator may not make use of ubreakiterator.
    131         // so we'll have to keep track of it ourselves.
    132 #else
    133         // Well, gee... the Constructors that take a BreakIterator
    134         // all cast the BreakIterator to a UBreakIterator and
    135         // pass it to the corresponding usearch_openFromXXX
    136         // routine, so there's no reason not to do this.
    137         //
    138         // Besides, a UBreakIterator is a BreakIterator, so
    139         // any subclass of BreakIterator should work fine here...
    140         m_search_->breakIter = (UBreakIterator *) breakiter;
    141 #endif
    142 
    143         m_breakiterator_ = breakiter;
    144     }
    145 }
    146 
    147 const BreakIterator * SearchIterator::getBreakIterator(void) const
    148 {
    149     return m_breakiterator_;
    150 }
    151 
    152 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
    153 {
    154     if (U_SUCCESS(status)) {
    155         if (text.length() == 0) {
    156             status = U_ILLEGAL_ARGUMENT_ERROR;
    157         }
    158         else {
    159             m_text_        = text;
    160             m_search_->text = m_text_.getBuffer();
    161             m_search_->textLength = m_text_.length();
    162         }
    163     }
    164 }
    165 
    166 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
    167 {
    168     if (U_SUCCESS(status)) {
    169         text.getText(m_text_);
    170         setText(m_text_, status);
    171     }
    172 }
    173 
    174 const UnicodeString & SearchIterator::getText(void) const
    175 {
    176     return m_text_;
    177 }
    178 
    179 // operator overloading ----------------------------------------------
    180 
    181 UBool SearchIterator::operator==(const SearchIterator &that) const
    182 {
    183     if (this == &that) {
    184         return TRUE;
    185     }
    186     return (m_breakiterator_            == that.m_breakiterator_ &&
    187             m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
    188             m_search_->isOverlap        == that.m_search_->isOverlap &&
    189             m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
    190             m_search_->matchedIndex     == that.m_search_->matchedIndex &&
    191             m_search_->matchedLength    == that.m_search_->matchedLength &&
    192             m_search_->textLength       == that.m_search_->textLength &&
    193             getOffset() == that.getOffset() &&
    194             (uprv_memcmp(m_search_->text, that.m_search_->text,
    195                               m_search_->textLength * sizeof(UChar)) == 0));
    196 }
    197 
    198 // public methods ----------------------------------------------------
    199 
    200 int32_t SearchIterator::first(UErrorCode &status)
    201 {
    202     if (U_FAILURE(status)) {
    203         return USEARCH_DONE;
    204     }
    205     setOffset(0, status);
    206     return handleNext(0, status);
    207 }
    208 
    209 int32_t SearchIterator::following(int32_t position,
    210                                       UErrorCode &status)
    211 {
    212     if (U_FAILURE(status)) {
    213         return USEARCH_DONE;
    214     }
    215     setOffset(position, status);
    216     return handleNext(position, status);
    217 }
    218 
    219 int32_t SearchIterator::last(UErrorCode &status)
    220 {
    221     if (U_FAILURE(status)) {
    222         return USEARCH_DONE;
    223     }
    224     setOffset(m_search_->textLength, status);
    225     return handlePrev(m_search_->textLength, status);
    226 }
    227 
    228 int32_t SearchIterator::preceding(int32_t position,
    229                                       UErrorCode &status)
    230 {
    231     if (U_FAILURE(status)) {
    232         return USEARCH_DONE;
    233     }
    234     setOffset(position, status);
    235     return handlePrev(position, status);
    236 }
    237 
    238 int32_t SearchIterator::next(UErrorCode &status)
    239 {
    240     if (U_SUCCESS(status)) {
    241         int32_t offset = getOffset();
    242         int32_t matchindex  = m_search_->matchedIndex;
    243         int32_t     matchlength = m_search_->matchedLength;
    244         m_search_->reset = FALSE;
    245         if (m_search_->isForwardSearching == TRUE) {
    246             int32_t textlength = m_search_->textLength;
    247             if (offset == textlength || matchindex == textlength ||
    248                 (matchindex != USEARCH_DONE &&
    249                 matchindex + matchlength >= textlength)) {
    250                 // not enough characters to match
    251                 setMatchNotFound();
    252                 return USEARCH_DONE;
    253             }
    254         }
    255         else {
    256             // switching direction.
    257             // if matchedIndex == USEARCH_DONE, it means that either a
    258             // setOffset has been called or that previous ran off the text
    259             // string. the iterator would have been set to offset 0 if a
    260             // match is not found.
    261             m_search_->isForwardSearching = TRUE;
    262             if (m_search_->matchedIndex != USEARCH_DONE) {
    263                 // there's no need to set the collation element iterator
    264                 // the next call to next will set the offset.
    265                 return matchindex;
    266             }
    267         }
    268 
    269         if (matchlength > 0) {
    270             // if matchlength is 0 we are at the start of the iteration
    271             if (m_search_->isOverlap) {
    272                 offset ++;
    273             }
    274             else {
    275                 offset += matchlength;
    276             }
    277         }
    278         return handleNext(offset, status);
    279     }
    280     return USEARCH_DONE;
    281 }
    282 
    283 int32_t SearchIterator::previous(UErrorCode &status)
    284 {
    285     if (U_SUCCESS(status)) {
    286         int32_t offset;
    287         if (m_search_->reset) {
    288             offset                       = m_search_->textLength;
    289             m_search_->isForwardSearching = FALSE;
    290             m_search_->reset              = FALSE;
    291             setOffset(offset, status);
    292         }
    293         else {
    294             offset = getOffset();
    295         }
    296 
    297         int32_t matchindex = m_search_->matchedIndex;
    298         if (m_search_->isForwardSearching == TRUE) {
    299             // switching direction.
    300             // if matchedIndex == USEARCH_DONE, it means that either a
    301             // setOffset has been called or that next ran off the text
    302             // string. the iterator would have been set to offset textLength if
    303             // a match is not found.
    304             m_search_->isForwardSearching = FALSE;
    305             if (matchindex != USEARCH_DONE) {
    306                 return matchindex;
    307             }
    308         }
    309         else {
    310             if (offset == 0 || matchindex == 0) {
    311                 // not enough characters to match
    312                 setMatchNotFound();
    313                 return USEARCH_DONE;
    314             }
    315         }
    316 
    317         if (matchindex != USEARCH_DONE) {
    318             if (m_search_->isOverlap) {
    319                 matchindex += m_search_->matchedLength - 2;
    320             }
    321 
    322             return handlePrev(matchindex, status);
    323         }
    324 
    325         return handlePrev(offset, status);
    326     }
    327 
    328     return USEARCH_DONE;
    329 }
    330 
    331 void SearchIterator::reset()
    332 {
    333     UErrorCode status = U_ZERO_ERROR;
    334     setMatchNotFound();
    335     setOffset(0, status);
    336     m_search_->isOverlap          = FALSE;
    337     m_search_->isCanonicalMatch   = FALSE;
    338     m_search_->elementComparisonType = 0;
    339     m_search_->isForwardSearching = TRUE;
    340     m_search_->reset              = TRUE;
    341 }
    342 
    343 // protected constructors and destructors -----------------------------
    344 
    345 SearchIterator::SearchIterator()
    346 {
    347     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
    348     m_search_->breakIter          = NULL;
    349     m_search_->isOverlap          = FALSE;
    350     m_search_->isCanonicalMatch   = FALSE;
    351     m_search_->elementComparisonType = 0;
    352     m_search_->isForwardSearching = TRUE;
    353     m_search_->reset              = TRUE;
    354     m_search_->matchedIndex       = USEARCH_DONE;
    355     m_search_->matchedLength      = 0;
    356     m_search_->text               = NULL;
    357     m_search_->textLength         = 0;
    358     m_breakiterator_              = NULL;
    359 }
    360 
    361 SearchIterator::SearchIterator(const UnicodeString &text,
    362                                      BreakIterator *breakiter) :
    363                                      m_breakiterator_(breakiter),
    364                                      m_text_(text)
    365 {
    366     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
    367     m_search_->breakIter          = NULL;
    368     m_search_->isOverlap          = FALSE;
    369     m_search_->isCanonicalMatch   = FALSE;
    370     m_search_->elementComparisonType = 0;
    371     m_search_->isForwardSearching = TRUE;
    372     m_search_->reset              = TRUE;
    373     m_search_->matchedIndex       = USEARCH_DONE;
    374     m_search_->matchedLength      = 0;
    375     m_search_->text               = m_text_.getBuffer();
    376     m_search_->textLength         = text.length();
    377 }
    378 
    379 SearchIterator::SearchIterator(CharacterIterator &text,
    380                                BreakIterator     *breakiter) :
    381                                m_breakiterator_(breakiter)
    382 {
    383     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
    384     m_search_->breakIter          = NULL;
    385     m_search_->isOverlap          = FALSE;
    386     m_search_->isCanonicalMatch   = FALSE;
    387     m_search_->elementComparisonType = 0;
    388     m_search_->isForwardSearching = TRUE;
    389     m_search_->reset              = TRUE;
    390     m_search_->matchedIndex       = USEARCH_DONE;
    391     m_search_->matchedLength      = 0;
    392     text.getText(m_text_);
    393     m_search_->text               = m_text_.getBuffer();
    394     m_search_->textLength         = m_text_.length();
    395     m_breakiterator_             = breakiter;
    396 }
    397 
    398 // protected methods ------------------------------------------------------
    399 
    400 SearchIterator & SearchIterator::operator=(const SearchIterator &that)
    401 {
    402     if (this != &that) {
    403         m_breakiterator_            = that.m_breakiterator_;
    404         m_text_                     = that.m_text_;
    405         m_search_->breakIter        = that.m_search_->breakIter;
    406         m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
    407         m_search_->isOverlap        = that.m_search_->isOverlap;
    408         m_search_->elementComparisonType = that.m_search_->elementComparisonType;
    409         m_search_->matchedIndex     = that.m_search_->matchedIndex;
    410         m_search_->matchedLength    = that.m_search_->matchedLength;
    411         m_search_->text             = that.m_search_->text;
    412         m_search_->textLength       = that.m_search_->textLength;
    413     }
    414     return *this;
    415 }
    416 
    417 void SearchIterator::setMatchLength(int32_t length)
    418 {
    419     m_search_->matchedLength = length;
    420 }
    421 
    422 void SearchIterator::setMatchStart(int32_t position)
    423 {
    424     m_search_->matchedIndex = position;
    425 }
    426 
    427 void SearchIterator::setMatchNotFound()
    428 {
    429     setMatchStart(USEARCH_DONE);
    430     setMatchLength(0);
    431     UErrorCode status = U_ZERO_ERROR;
    432     // by default no errors should be returned here since offsets are within
    433     // range.
    434     if (m_search_->isForwardSearching) {
    435         setOffset(m_search_->textLength, status);
    436     }
    437     else {
    438         setOffset(0, status);
    439     }
    440 }
    441 
    442 
    443 U_NAMESPACE_END
    444 
    445 #endif /* #if !UCONFIG_NO_COLLATION */
    446