Home | History | Annotate | Download | only in i18n
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
      4 **********************************************************************
      5 *   Date        Name        Description
      6 *  03/22/2000   helena      Creation.
      7 **********************************************************************
      8 */
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
     13 
     14 #include "unicode/brkiter.h"
     15 #include "unicode/schriter.h"
     16 #include "unicode/search.h"
     17 #include "usrchimp.h"
     18 #include "cmemory.h"
     19 
     20 // public constructors and destructors -----------------------------------
     21 U_NAMESPACE_BEGIN
     22 
     23 SearchIterator::SearchIterator(const SearchIterator &other)
     24     : UObject(other)
     25 {
     26     m_breakiterator_            = other.m_breakiterator_;
     27     m_text_                     = other.m_text_;
     28     m_search_                   = (USearch *)uprv_malloc(sizeof(USearch));
     29     m_search_->breakIter        = other.m_search_->breakIter;
     30     m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
     31     m_search_->isOverlap        = other.m_search_->isOverlap;
     32     m_search_->elementComparisonType = other.m_search_->elementComparisonType;
     33     m_search_->matchedIndex     = other.m_search_->matchedIndex;
     34     m_search_->matchedLength    = other.m_search_->matchedLength;
     35     m_search_->text             = other.m_search_->text;
     36     m_search_->textLength       = other.m_search_->textLength;
     37 }
     38 
     39 SearchIterator::~SearchIterator()
     40 {
     41     if (m_search_ != NULL) {
     42         uprv_free(m_search_);
     43     }
     44 }
     45 
     46 // public get and set methods ----------------------------------------
     47 
     48 void SearchIterator::setAttribute(USearchAttribute       attribute,
     49                                   USearchAttributeValue  value,
     50                                   UErrorCode            &status)
     51 {
     52     if (U_SUCCESS(status)) {
     53         switch (attribute)
     54         {
     55         case USEARCH_OVERLAP :
     56             m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
     57             break;
     58         case USEARCH_CANONICAL_MATCH :
     59             m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
     60             break;
     61         case USEARCH_ELEMENT_COMPARISON :
     62             if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
     63                 m_search_->elementComparisonType = (int16_t)value;
     64             } else {
     65                 m_search_->elementComparisonType = 0;
     66             }
     67             break;
     68         default:
     69             status = U_ILLEGAL_ARGUMENT_ERROR;
     70         }
     71     }
     72     if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
     73         status = U_ILLEGAL_ARGUMENT_ERROR;
     74     }
     75 }
     76 
     77 USearchAttributeValue SearchIterator::getAttribute(
     78                                           USearchAttribute  attribute) const
     79 {
     80     switch (attribute) {
     81     case USEARCH_OVERLAP :
     82         return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
     83     case USEARCH_CANONICAL_MATCH :
     84         return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
     85                                                                 USEARCH_OFF);
     86     case USEARCH_ELEMENT_COMPARISON :
     87         {
     88             int16_t value = m_search_->elementComparisonType;
     89             if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
     90                 return (USearchAttributeValue)value;
     91             } else {
     92                 return USEARCH_STANDARD_ELEMENT_COMPARISON;
     93             }
     94         }
     95     default :
     96         return USEARCH_DEFAULT;
     97     }
     98 }
     99 
    100 int32_t SearchIterator::getMatchedStart() const
    101 {
    102     return m_search_->matchedIndex;
    103 }
    104 
    105 int32_t SearchIterator::getMatchedLength() const
    106 {
    107     return m_search_->matchedLength;
    108 }
    109 
    110 void SearchIterator::getMatchedText(UnicodeString &result) const
    111 {
    112     int32_t matchedindex  = m_search_->matchedIndex;
    113     int32_t     matchedlength = m_search_->matchedLength;
    114     if (matchedindex != USEARCH_DONE && matchedlength != 0) {
    115         result.setTo(m_search_->text + matchedindex, matchedlength);
    116     }
    117     else {
    118         result.remove();
    119     }
    120 }
    121 
    122 void SearchIterator::setBreakIterator(BreakIterator *breakiter,
    123                                       UErrorCode &status)
    124 {
    125     if (U_SUCCESS(status)) {
    126 #if 0
    127         m_search_->breakIter = NULL;
    128         // the c++ breakiterator may not make use of ubreakiterator.
    129         // so we'll have to keep track of it ourselves.
    130 #else
    131         // Well, gee... the Constructors that take a BreakIterator
    132         // all cast the BreakIterator to a UBreakIterator and
    133         // pass it to the corresponding usearch_openFromXXX
    134         // routine, so there's no reason not to do this.
    135         //
    136         // Besides, a UBreakIterator is a BreakIterator, so
    137         // any subclass of BreakIterator should work fine here...
    138         m_search_->breakIter = (UBreakIterator *) breakiter;
    139 #endif
    140 
    141         m_breakiterator_ = breakiter;
    142     }
    143 }
    144 
    145 const BreakIterator * SearchIterator::getBreakIterator(void) const
    146 {
    147     return m_breakiterator_;
    148 }
    149 
    150 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
    151 {
    152     if (U_SUCCESS(status)) {
    153         if (text.length() == 0) {
    154             status = U_ILLEGAL_ARGUMENT_ERROR;
    155         }
    156         else {
    157             m_text_        = text;
    158             m_search_->text = m_text_.getBuffer();
    159             m_search_->textLength = m_text_.length();
    160         }
    161     }
    162 }
    163 
    164 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
    165 {
    166     if (U_SUCCESS(status)) {
    167         text.getText(m_text_);
    168         setText(m_text_, status);
    169     }
    170 }
    171 
    172 const UnicodeString & SearchIterator::getText(void) const
    173 {
    174     return m_text_;
    175 }
    176 
    177 // operator overloading ----------------------------------------------
    178 
    179 UBool SearchIterator::operator==(const SearchIterator &that) const
    180 {
    181     if (this == &that) {
    182         return TRUE;
    183     }
    184     return (m_breakiterator_            == that.m_breakiterator_ &&
    185             m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
    186             m_search_->isOverlap        == that.m_search_->isOverlap &&
    187             m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
    188             m_search_->matchedIndex     == that.m_search_->matchedIndex &&
    189             m_search_->matchedLength    == that.m_search_->matchedLength &&
    190             m_search_->textLength       == that.m_search_->textLength &&
    191             getOffset() == that.getOffset() &&
    192             (uprv_memcmp(m_search_->text, that.m_search_->text,
    193                               m_search_->textLength * sizeof(UChar)) == 0));
    194 }
    195 
    196 // public methods ----------------------------------------------------
    197 
    198 int32_t SearchIterator::first(UErrorCode &status)
    199 {
    200     if (U_FAILURE(status)) {
    201         return USEARCH_DONE;
    202     }
    203     setOffset(0, status);
    204     return handleNext(0, status);
    205 }
    206 
    207 int32_t SearchIterator::following(int32_t position,
    208                                       UErrorCode &status)
    209 {
    210     if (U_FAILURE(status)) {
    211         return USEARCH_DONE;
    212     }
    213     setOffset(position, status);
    214     return handleNext(position, status);
    215 }
    216 
    217 int32_t SearchIterator::last(UErrorCode &status)
    218 {
    219     if (U_FAILURE(status)) {
    220         return USEARCH_DONE;
    221     }
    222     setOffset(m_search_->textLength, status);
    223     return handlePrev(m_search_->textLength, status);
    224 }
    225 
    226 int32_t SearchIterator::preceding(int32_t position,
    227                                       UErrorCode &status)
    228 {
    229     if (U_FAILURE(status)) {
    230         return USEARCH_DONE;
    231     }
    232     setOffset(position, status);
    233     return handlePrev(position, status);
    234 }
    235 
    236 int32_t SearchIterator::next(UErrorCode &status)
    237 {
    238     if (U_SUCCESS(status)) {
    239         int32_t offset = getOffset();
    240         int32_t matchindex  = m_search_->matchedIndex;
    241         int32_t     matchlength = m_search_->matchedLength;
    242         m_search_->reset = FALSE;
    243         if (m_search_->isForwardSearching == TRUE) {
    244             int32_t textlength = m_search_->textLength;
    245             if (offset == textlength || matchindex == textlength ||
    246                 (matchindex != USEARCH_DONE &&
    247                 matchindex + matchlength >= textlength)) {
    248                 // not enough characters to match
    249                 setMatchNotFound();
    250                 return USEARCH_DONE;
    251             }
    252         }
    253         else {
    254             // switching direction.
    255             // if matchedIndex == USEARCH_DONE, it means that either a
    256             // setOffset has been called or that previous ran off the text
    257             // string. the iterator would have been set to offset 0 if a
    258             // match is not found.
    259             m_search_->isForwardSearching = TRUE;
    260             if (m_search_->matchedIndex != USEARCH_DONE) {
    261                 // there's no need to set the collation element iterator
    262                 // the next call to next will set the offset.
    263                 return matchindex;
    264             }
    265         }
    266 
    267         if (matchlength > 0) {
    268             // if matchlength is 0 we are at the start of the iteration
    269             if (m_search_->isOverlap) {
    270                 offset ++;
    271             }
    272             else {
    273                 offset += matchlength;
    274             }
    275         }
    276         return handleNext(offset, status);
    277     }
    278     return USEARCH_DONE;
    279 }
    280 
    281 int32_t SearchIterator::previous(UErrorCode &status)
    282 {
    283     if (U_SUCCESS(status)) {
    284         int32_t offset;
    285         if (m_search_->reset) {
    286             offset                       = m_search_->textLength;
    287             m_search_->isForwardSearching = FALSE;
    288             m_search_->reset              = FALSE;
    289             setOffset(offset, status);
    290         }
    291         else {
    292             offset = getOffset();
    293         }
    294 
    295         int32_t matchindex = m_search_->matchedIndex;
    296         if (m_search_->isForwardSearching == TRUE) {
    297             // switching direction.
    298             // if matchedIndex == USEARCH_DONE, it means that either a
    299             // setOffset has been called or that next ran off the text
    300             // string. the iterator would have been set to offset textLength if
    301             // a match is not found.
    302             m_search_->isForwardSearching = FALSE;
    303             if (matchindex != USEARCH_DONE) {
    304                 return matchindex;
    305             }
    306         }
    307         else {
    308             if (offset == 0 || matchindex == 0) {
    309                 // not enough characters to match
    310                 setMatchNotFound();
    311                 return USEARCH_DONE;
    312             }
    313         }
    314 
    315         if (matchindex != USEARCH_DONE) {
    316             if (m_search_->isOverlap) {
    317                 matchindex += m_search_->matchedLength - 2;
    318             }
    319 
    320             return handlePrev(matchindex, status);
    321         }
    322 
    323         return handlePrev(offset, status);
    324     }
    325 
    326     return USEARCH_DONE;
    327 }
    328 
    329 void SearchIterator::reset()
    330 {
    331     UErrorCode status = U_ZERO_ERROR;
    332     setMatchNotFound();
    333     setOffset(0, status);
    334     m_search_->isOverlap          = FALSE;
    335     m_search_->isCanonicalMatch   = FALSE;
    336     m_search_->elementComparisonType = 0;
    337     m_search_->isForwardSearching = TRUE;
    338     m_search_->reset              = TRUE;
    339 }
    340 
    341 // protected constructors and destructors -----------------------------
    342 
    343 SearchIterator::SearchIterator()
    344 {
    345     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
    346     m_search_->breakIter          = NULL;
    347     m_search_->isOverlap          = FALSE;
    348     m_search_->isCanonicalMatch   = FALSE;
    349     m_search_->elementComparisonType = 0;
    350     m_search_->isForwardSearching = TRUE;
    351     m_search_->reset              = TRUE;
    352     m_search_->matchedIndex       = USEARCH_DONE;
    353     m_search_->matchedLength      = 0;
    354     m_search_->text               = NULL;
    355     m_search_->textLength         = 0;
    356     m_breakiterator_              = NULL;
    357 }
    358 
    359 SearchIterator::SearchIterator(const UnicodeString &text,
    360                                      BreakIterator *breakiter) :
    361                                      m_breakiterator_(breakiter),
    362                                      m_text_(text)
    363 {
    364     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
    365     m_search_->breakIter          = NULL;
    366     m_search_->isOverlap          = FALSE;
    367     m_search_->isCanonicalMatch   = FALSE;
    368     m_search_->elementComparisonType = 0;
    369     m_search_->isForwardSearching = TRUE;
    370     m_search_->reset              = TRUE;
    371     m_search_->matchedIndex       = USEARCH_DONE;
    372     m_search_->matchedLength      = 0;
    373     m_search_->text               = m_text_.getBuffer();
    374     m_search_->textLength         = text.length();
    375 }
    376 
    377 SearchIterator::SearchIterator(CharacterIterator &text,
    378                                BreakIterator     *breakiter) :
    379                                m_breakiterator_(breakiter)
    380 {
    381     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
    382     m_search_->breakIter          = NULL;
    383     m_search_->isOverlap          = FALSE;
    384     m_search_->isCanonicalMatch   = FALSE;
    385     m_search_->elementComparisonType = 0;
    386     m_search_->isForwardSearching = TRUE;
    387     m_search_->reset              = TRUE;
    388     m_search_->matchedIndex       = USEARCH_DONE;
    389     m_search_->matchedLength      = 0;
    390     text.getText(m_text_);
    391     m_search_->text               = m_text_.getBuffer();
    392     m_search_->textLength         = m_text_.length();
    393     m_breakiterator_             = breakiter;
    394 }
    395 
    396 // protected methods ------------------------------------------------------
    397 
    398 SearchIterator & SearchIterator::operator=(const SearchIterator &that)
    399 {
    400     if (this != &that) {
    401         m_breakiterator_            = that.m_breakiterator_;
    402         m_text_                     = that.m_text_;
    403         m_search_->breakIter        = that.m_search_->breakIter;
    404         m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
    405         m_search_->isOverlap        = that.m_search_->isOverlap;
    406         m_search_->elementComparisonType = that.m_search_->elementComparisonType;
    407         m_search_->matchedIndex     = that.m_search_->matchedIndex;
    408         m_search_->matchedLength    = that.m_search_->matchedLength;
    409         m_search_->text             = that.m_search_->text;
    410         m_search_->textLength       = that.m_search_->textLength;
    411     }
    412     return *this;
    413 }
    414 
    415 void SearchIterator::setMatchLength(int32_t length)
    416 {
    417     m_search_->matchedLength = length;
    418 }
    419 
    420 void SearchIterator::setMatchStart(int32_t position)
    421 {
    422     m_search_->matchedIndex = position;
    423 }
    424 
    425 void SearchIterator::setMatchNotFound()
    426 {
    427     setMatchStart(USEARCH_DONE);
    428     setMatchLength(0);
    429     UErrorCode status = U_ZERO_ERROR;
    430     // by default no errors should be returned here since offsets are within
    431     // range.
    432     if (m_search_->isForwardSearching) {
    433         setOffset(m_search_->textLength, status);
    434     }
    435     else {
    436         setOffset(0, status);
    437     }
    438 }
    439 
    440 
    441 U_NAMESPACE_END
    442 
    443 #endif /* #if !UCONFIG_NO_COLLATION */
    444