Home | History | Annotate | Download | only in i18n
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2001-2008 IBM and others. All rights reserved.
      4 **********************************************************************
      5 *   Date        Name        Description
      6 *  03/22/2000   helena      Creation.
      7 **********************************************************************
      8 */
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
     13 
     14 #include "unicode/brkiter.h"
     15 #include "unicode/schriter.h"
     16 #include "unicode/search.h"
     17 #include "usrchimp.h"
     18 #include "cmemory.h"
     19 
     20 // public constructors and destructors -----------------------------------
     21 U_NAMESPACE_BEGIN
     22 
     23 SearchIterator::SearchIterator(const SearchIterator &other)
     24     : UObject(other)
     25 {
     26     m_breakiterator_            = other.m_breakiterator_;
     27     m_text_                     = other.m_text_;
     28     m_search_                   = (USearch *)uprv_malloc(sizeof(USearch));
     29     m_search_->breakIter        = other.m_search_->breakIter;
     30     m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
     31     m_search_->isOverlap        = other.m_search_->isOverlap;
     32     m_search_->matchedIndex     = other.m_search_->matchedIndex;
     33     m_search_->matchedLength    = other.m_search_->matchedLength;
     34     m_search_->text             = other.m_search_->text;
     35     m_search_->textLength       = other.m_search_->textLength;
     36 }
     37 
     38 SearchIterator::~SearchIterator()
     39 {
     40     if (m_search_ != NULL) {
     41         uprv_free(m_search_);
     42     }
     43 }
     44 
     45 // public get and set methods ----------------------------------------
     46 
     47 void SearchIterator::setAttribute(USearchAttribute       attribute,
     48                                   USearchAttributeValue  value,
     49                                   UErrorCode            &status)
     50 {
     51     if (U_SUCCESS(status)) {
     52         switch (attribute)
     53         {
     54         case USEARCH_OVERLAP :
     55             m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
     56             break;
     57         case USEARCH_CANONICAL_MATCH :
     58             m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
     59             break;
     60         default:
     61             status = U_ILLEGAL_ARGUMENT_ERROR;
     62         }
     63     }
     64     if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
     65         status = U_ILLEGAL_ARGUMENT_ERROR;
     66     }
     67 }
     68 
     69 USearchAttributeValue SearchIterator::getAttribute(
     70                                           USearchAttribute  attribute) const
     71 {
     72     switch (attribute) {
     73     case USEARCH_OVERLAP :
     74         return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
     75     case USEARCH_CANONICAL_MATCH :
     76         return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
     77                                                                 USEARCH_OFF);
     78     default :
     79         return USEARCH_DEFAULT;
     80     }
     81 }
     82 
     83 int32_t SearchIterator::getMatchedStart() const
     84 {
     85     return m_search_->matchedIndex;
     86 }
     87 
     88 int32_t SearchIterator::getMatchedLength() const
     89 {
     90     return m_search_->matchedLength;
     91 }
     92 
     93 void SearchIterator::getMatchedText(UnicodeString &result) const
     94 {
     95     int32_t matchedindex  = m_search_->matchedIndex;
     96     int32_t     matchedlength = m_search_->matchedLength;
     97     if (matchedindex != USEARCH_DONE && matchedlength != 0) {
     98         result.setTo(m_search_->text + matchedindex, matchedlength);
     99     }
    100     else {
    101         result.remove();
    102     }
    103 }
    104 
    105 void SearchIterator::setBreakIterator(BreakIterator *breakiter,
    106                                       UErrorCode &status)
    107 {
    108     if (U_SUCCESS(status)) {
    109 #if 0
    110         m_search_->breakIter = NULL;
    111         // the c++ breakiterator may not make use of ubreakiterator.
    112         // so we'll have to keep track of it ourselves.
    113 #else
    114         // Well, gee... the Constructors that take a BreakIterator
    115         // all cast the BreakIterator to a UBreakIterator and
    116         // pass it to the corresponding usearch_openFromXXX
    117         // routine, so there's no reason not to do this.
    118         //
    119         // Besides, a UBreakIterator is a BreakIterator, so
    120         // any subclass of BreakIterator should work fine here...
    121         m_search_->breakIter = (UBreakIterator *) breakiter;
    122 #endif
    123 
    124         m_breakiterator_ = breakiter;
    125     }
    126 }
    127 
    128 const BreakIterator * SearchIterator::getBreakIterator(void) const
    129 {
    130     return m_breakiterator_;
    131 }
    132 
    133 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
    134 {
    135     if (U_SUCCESS(status)) {
    136         if (text.length() == 0) {
    137             status = U_ILLEGAL_ARGUMENT_ERROR;
    138         }
    139         else {
    140             m_text_        = text;
    141             m_search_->text = m_text_.getBuffer();
    142             m_search_->textLength = m_text_.length();
    143         }
    144     }
    145 }
    146 
    147 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
    148 {
    149     if (U_SUCCESS(status)) {
    150         text.getText(m_text_);
    151         setText(m_text_, status);
    152     }
    153 }
    154 
    155 const UnicodeString & SearchIterator::getText(void) const
    156 {
    157     return m_text_;
    158 }
    159 
    160 // operator overloading ----------------------------------------------
    161 
    162 UBool SearchIterator::operator==(const SearchIterator &that) const
    163 {
    164     if (this == &that) {
    165         return TRUE;
    166     }
    167     return (m_breakiterator_            == that.m_breakiterator_ &&
    168             m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
    169             m_search_->isOverlap        == that.m_search_->isOverlap &&
    170             m_search_->matchedIndex     == that.m_search_->matchedIndex &&
    171             m_search_->matchedLength    == that.m_search_->matchedLength &&
    172             m_search_->textLength       == that.m_search_->textLength &&
    173             getOffset() == that.getOffset() &&
    174             (uprv_memcmp(m_search_->text, that.m_search_->text,
    175                               m_search_->textLength * sizeof(UChar)) == 0));
    176 }
    177 
    178 // public methods ----------------------------------------------------
    179 
    180 int32_t SearchIterator::first(UErrorCode &status)
    181 {
    182     if (U_FAILURE(status)) {
    183         return USEARCH_DONE;
    184     }
    185     setOffset(0, status);
    186     return handleNext(0, status);
    187 }
    188 
    189 int32_t SearchIterator::following(int32_t position,
    190                                       UErrorCode &status)
    191 {
    192     if (U_FAILURE(status)) {
    193         return USEARCH_DONE;
    194     }
    195     setOffset(position, status);
    196     return handleNext(position, status);
    197 }
    198 
    199 int32_t SearchIterator::last(UErrorCode &status)
    200 {
    201     if (U_FAILURE(status)) {
    202         return USEARCH_DONE;
    203     }
    204     setOffset(m_search_->textLength, status);
    205     return handlePrev(m_search_->textLength, status);
    206 }
    207 
    208 int32_t SearchIterator::preceding(int32_t position,
    209                                       UErrorCode &status)
    210 {
    211     if (U_FAILURE(status)) {
    212         return USEARCH_DONE;
    213     }
    214     setOffset(position, status);
    215     return handlePrev(position, status);
    216 }
    217 
    218 int32_t SearchIterator::next(UErrorCode &status)
    219 {
    220     if (U_SUCCESS(status)) {
    221         int32_t offset = getOffset();
    222         int32_t matchindex  = m_search_->matchedIndex;
    223         int32_t     matchlength = m_search_->matchedLength;
    224         m_search_->reset = FALSE;
    225         if (m_search_->isForwardSearching == TRUE) {
    226             int32_t textlength = m_search_->textLength;
    227             if (offset == textlength || matchindex == textlength ||
    228                 (matchindex != USEARCH_DONE &&
    229                 matchindex + matchlength >= textlength)) {
    230                 // not enough characters to match
    231                 setMatchNotFound();
    232                 return USEARCH_DONE;
    233             }
    234         }
    235         else {
    236             // switching direction.
    237             // if matchedIndex == USEARCH_DONE, it means that either a
    238             // setOffset has been called or that previous ran off the text
    239             // string. the iterator would have been set to offset 0 if a
    240             // match is not found.
    241             m_search_->isForwardSearching = TRUE;
    242             if (m_search_->matchedIndex != USEARCH_DONE) {
    243                 // there's no need to set the collation element iterator
    244                 // the next call to next will set the offset.
    245                 return matchindex;
    246             }
    247         }
    248 
    249         if (matchlength > 0) {
    250             // if matchlength is 0 we are at the start of the iteration
    251             if (m_search_->isOverlap) {
    252                 offset ++;
    253             }
    254             else {
    255                 offset += matchlength;
    256             }
    257         }
    258         return handleNext(offset, status);
    259     }
    260     return USEARCH_DONE;
    261 }
    262 
    263 int32_t SearchIterator::previous(UErrorCode &status)
    264 {
    265     if (U_SUCCESS(status)) {
    266         int32_t offset;
    267         if (m_search_->reset) {
    268             offset                       = m_search_->textLength;
    269             m_search_->isForwardSearching = FALSE;
    270             m_search_->reset              = FALSE;
    271             setOffset(offset, status);
    272         }
    273         else {
    274             offset = getOffset();
    275         }
    276 
    277         int32_t matchindex = m_search_->matchedIndex;
    278         if (m_search_->isForwardSearching == TRUE) {
    279             // switching direction.
    280             // if matchedIndex == USEARCH_DONE, it means that either a
    281             // setOffset has been called or that next ran off the text
    282             // string. the iterator would have been set to offset textLength if
    283             // a match is not found.
    284             m_search_->isForwardSearching = FALSE;
    285             if (matchindex != USEARCH_DONE) {
    286                 return matchindex;
    287             }
    288         }
    289         else {
    290             if (offset == 0 || matchindex == 0) {
    291                 // not enough characters to match
    292                 setMatchNotFound();
    293                 return USEARCH_DONE;
    294             }
    295         }
    296 
    297         if (matchindex != USEARCH_DONE) {
    298             if (m_search_->isOverlap) {
    299                 matchindex += m_search_->matchedLength - 2;
    300             }
    301 
    302             return handlePrev(matchindex, status);
    303         }
    304 
    305         return handlePrev(offset, status);
    306     }
    307 
    308     return USEARCH_DONE;
    309 }
    310 
    311 void SearchIterator::reset()
    312 {
    313     UErrorCode status = U_ZERO_ERROR;
    314     setMatchNotFound();
    315     setOffset(0, status);
    316     m_search_->isOverlap          = FALSE;
    317     m_search_->isCanonicalMatch   = FALSE;
    318     m_search_->isForwardSearching = TRUE;
    319     m_search_->reset              = TRUE;
    320 }
    321 
    322 // protected constructors and destructors -----------------------------
    323 
    324 SearchIterator::SearchIterator()
    325 {
    326     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
    327     m_search_->breakIter          = NULL;
    328     m_search_->isOverlap          = FALSE;
    329     m_search_->isCanonicalMatch   = FALSE;
    330     m_search_->isForwardSearching = TRUE;
    331     m_search_->reset              = TRUE;
    332     m_search_->matchedIndex       = USEARCH_DONE;
    333     m_search_->matchedLength      = 0;
    334     m_search_->text               = NULL;
    335     m_search_->textLength         = 0;
    336     m_breakiterator_              = NULL;
    337 }
    338 
    339 SearchIterator::SearchIterator(const UnicodeString &text,
    340                                      BreakIterator *breakiter) :
    341                                      m_breakiterator_(breakiter),
    342                                      m_text_(text)
    343 {
    344     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
    345     m_search_->breakIter          = NULL;
    346     m_search_->isOverlap          = FALSE;
    347     m_search_->isCanonicalMatch   = FALSE;
    348     m_search_->isForwardSearching = TRUE;
    349     m_search_->reset              = TRUE;
    350     m_search_->matchedIndex       = USEARCH_DONE;
    351     m_search_->matchedLength      = 0;
    352     m_search_->text               = m_text_.getBuffer();
    353     m_search_->textLength         = text.length();
    354 }
    355 
    356 SearchIterator::SearchIterator(CharacterIterator &text,
    357                                BreakIterator     *breakiter) :
    358                                m_breakiterator_(breakiter)
    359 {
    360     m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
    361     m_search_->breakIter          = NULL;
    362     m_search_->isOverlap          = FALSE;
    363     m_search_->isCanonicalMatch   = FALSE;
    364     m_search_->isForwardSearching = TRUE;
    365     m_search_->reset              = TRUE;
    366     m_search_->matchedIndex       = USEARCH_DONE;
    367     m_search_->matchedLength      = 0;
    368     text.getText(m_text_);
    369     m_search_->text               = m_text_.getBuffer();
    370     m_search_->textLength         = m_text_.length();
    371     m_breakiterator_             = breakiter;
    372 }
    373 
    374 // protected methods ------------------------------------------------------
    375 
    376 SearchIterator & SearchIterator::operator=(const SearchIterator &that)
    377 {
    378     if (this != &that) {
    379         m_breakiterator_            = that.m_breakiterator_;
    380         m_text_                     = that.m_text_;
    381         m_search_->breakIter        = that.m_search_->breakIter;
    382         m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
    383         m_search_->isOverlap        = that.m_search_->isOverlap;
    384         m_search_->matchedIndex     = that.m_search_->matchedIndex;
    385         m_search_->matchedLength    = that.m_search_->matchedLength;
    386         m_search_->text             = that.m_search_->text;
    387         m_search_->textLength       = that.m_search_->textLength;
    388     }
    389     return *this;
    390 }
    391 
    392 void SearchIterator::setMatchLength(int32_t length)
    393 {
    394     m_search_->matchedLength = length;
    395 }
    396 
    397 void SearchIterator::setMatchStart(int32_t position)
    398 {
    399     m_search_->matchedIndex = position;
    400 }
    401 
    402 void SearchIterator::setMatchNotFound()
    403 {
    404     setMatchStart(USEARCH_DONE);
    405     setMatchLength(0);
    406     UErrorCode status = U_ZERO_ERROR;
    407     // by default no errors should be returned here since offsets are within
    408     // range.
    409     if (m_search_->isForwardSearching) {
    410         setOffset(m_search_->textLength, status);
    411     }
    412     else {
    413         setOffset(0, status);
    414     }
    415 }
    416 
    417 
    418 U_NAMESPACE_END
    419 
    420 #endif /* #if !UCONFIG_NO_COLLATION */
    421