1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 2001-2008,2010 IBM and others. All rights reserved. 6 ********************************************************************** 7 * Date Name Description 8 * 03/22/2000 helena Creation. 9 ********************************************************************** 10 */ 11 12 #include "unicode/utypes.h" 13 14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 15 16 #include "unicode/brkiter.h" 17 #include "unicode/schriter.h" 18 #include "unicode/search.h" 19 #include "usrchimp.h" 20 #include "cmemory.h" 21 22 // public constructors and destructors ----------------------------------- 23 U_NAMESPACE_BEGIN 24 25 SearchIterator::SearchIterator(const SearchIterator &other) 26 : UObject(other) 27 { 28 m_breakiterator_ = other.m_breakiterator_; 29 m_text_ = other.m_text_; 30 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 31 m_search_->breakIter = other.m_search_->breakIter; 32 m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch; 33 m_search_->isOverlap = other.m_search_->isOverlap; 34 m_search_->elementComparisonType = other.m_search_->elementComparisonType; 35 m_search_->matchedIndex = other.m_search_->matchedIndex; 36 m_search_->matchedLength = other.m_search_->matchedLength; 37 m_search_->text = other.m_search_->text; 38 m_search_->textLength = other.m_search_->textLength; 39 } 40 41 SearchIterator::~SearchIterator() 42 { 43 if (m_search_ != NULL) { 44 uprv_free(m_search_); 45 } 46 } 47 48 // public get and set methods ---------------------------------------- 49 50 void SearchIterator::setAttribute(USearchAttribute attribute, 51 USearchAttributeValue value, 52 UErrorCode &status) 53 { 54 if (U_SUCCESS(status)) { 55 switch (attribute) 56 { 57 case USEARCH_OVERLAP : 58 m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE); 59 break; 60 case USEARCH_CANONICAL_MATCH : 61 m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE); 62 break; 63 case USEARCH_ELEMENT_COMPARISON : 64 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) { 65 m_search_->elementComparisonType = (int16_t)value; 66 } else { 67 m_search_->elementComparisonType = 0; 68 } 69 break; 70 default: 71 status = U_ILLEGAL_ARGUMENT_ERROR; 72 } 73 } 74 if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) { 75 status = U_ILLEGAL_ARGUMENT_ERROR; 76 } 77 } 78 79 USearchAttributeValue SearchIterator::getAttribute( 80 USearchAttribute attribute) const 81 { 82 switch (attribute) { 83 case USEARCH_OVERLAP : 84 return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF); 85 case USEARCH_CANONICAL_MATCH : 86 return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : 87 USEARCH_OFF); 88 case USEARCH_ELEMENT_COMPARISON : 89 { 90 int16_t value = m_search_->elementComparisonType; 91 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) { 92 return (USearchAttributeValue)value; 93 } else { 94 return USEARCH_STANDARD_ELEMENT_COMPARISON; 95 } 96 } 97 default : 98 return USEARCH_DEFAULT; 99 } 100 } 101 102 int32_t SearchIterator::getMatchedStart() const 103 { 104 return m_search_->matchedIndex; 105 } 106 107 int32_t SearchIterator::getMatchedLength() const 108 { 109 return m_search_->matchedLength; 110 } 111 112 void SearchIterator::getMatchedText(UnicodeString &result) const 113 { 114 int32_t matchedindex = m_search_->matchedIndex; 115 int32_t matchedlength = m_search_->matchedLength; 116 if (matchedindex != USEARCH_DONE && matchedlength != 0) { 117 result.setTo(m_search_->text + matchedindex, matchedlength); 118 } 119 else { 120 result.remove(); 121 } 122 } 123 124 void SearchIterator::setBreakIterator(BreakIterator *breakiter, 125 UErrorCode &status) 126 { 127 if (U_SUCCESS(status)) { 128 #if 0 129 m_search_->breakIter = NULL; 130 // the c++ breakiterator may not make use of ubreakiterator. 131 // so we'll have to keep track of it ourselves. 132 #else 133 // Well, gee... the Constructors that take a BreakIterator 134 // all cast the BreakIterator to a UBreakIterator and 135 // pass it to the corresponding usearch_openFromXXX 136 // routine, so there's no reason not to do this. 137 // 138 // Besides, a UBreakIterator is a BreakIterator, so 139 // any subclass of BreakIterator should work fine here... 140 m_search_->breakIter = (UBreakIterator *) breakiter; 141 #endif 142 143 m_breakiterator_ = breakiter; 144 } 145 } 146 147 const BreakIterator * SearchIterator::getBreakIterator(void) const 148 { 149 return m_breakiterator_; 150 } 151 152 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status) 153 { 154 if (U_SUCCESS(status)) { 155 if (text.length() == 0) { 156 status = U_ILLEGAL_ARGUMENT_ERROR; 157 } 158 else { 159 m_text_ = text; 160 m_search_->text = m_text_.getBuffer(); 161 m_search_->textLength = m_text_.length(); 162 } 163 } 164 } 165 166 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status) 167 { 168 if (U_SUCCESS(status)) { 169 text.getText(m_text_); 170 setText(m_text_, status); 171 } 172 } 173 174 const UnicodeString & SearchIterator::getText(void) const 175 { 176 return m_text_; 177 } 178 179 // operator overloading ---------------------------------------------- 180 181 UBool SearchIterator::operator==(const SearchIterator &that) const 182 { 183 if (this == &that) { 184 return TRUE; 185 } 186 return (m_breakiterator_ == that.m_breakiterator_ && 187 m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch && 188 m_search_->isOverlap == that.m_search_->isOverlap && 189 m_search_->elementComparisonType == that.m_search_->elementComparisonType && 190 m_search_->matchedIndex == that.m_search_->matchedIndex && 191 m_search_->matchedLength == that.m_search_->matchedLength && 192 m_search_->textLength == that.m_search_->textLength && 193 getOffset() == that.getOffset() && 194 (uprv_memcmp(m_search_->text, that.m_search_->text, 195 m_search_->textLength * sizeof(UChar)) == 0)); 196 } 197 198 // public methods ---------------------------------------------------- 199 200 int32_t SearchIterator::first(UErrorCode &status) 201 { 202 if (U_FAILURE(status)) { 203 return USEARCH_DONE; 204 } 205 setOffset(0, status); 206 return handleNext(0, status); 207 } 208 209 int32_t SearchIterator::following(int32_t position, 210 UErrorCode &status) 211 { 212 if (U_FAILURE(status)) { 213 return USEARCH_DONE; 214 } 215 setOffset(position, status); 216 return handleNext(position, status); 217 } 218 219 int32_t SearchIterator::last(UErrorCode &status) 220 { 221 if (U_FAILURE(status)) { 222 return USEARCH_DONE; 223 } 224 setOffset(m_search_->textLength, status); 225 return handlePrev(m_search_->textLength, status); 226 } 227 228 int32_t SearchIterator::preceding(int32_t position, 229 UErrorCode &status) 230 { 231 if (U_FAILURE(status)) { 232 return USEARCH_DONE; 233 } 234 setOffset(position, status); 235 return handlePrev(position, status); 236 } 237 238 int32_t SearchIterator::next(UErrorCode &status) 239 { 240 if (U_SUCCESS(status)) { 241 int32_t offset = getOffset(); 242 int32_t matchindex = m_search_->matchedIndex; 243 int32_t matchlength = m_search_->matchedLength; 244 m_search_->reset = FALSE; 245 if (m_search_->isForwardSearching == TRUE) { 246 int32_t textlength = m_search_->textLength; 247 if (offset == textlength || matchindex == textlength || 248 (matchindex != USEARCH_DONE && 249 matchindex + matchlength >= textlength)) { 250 // not enough characters to match 251 setMatchNotFound(); 252 return USEARCH_DONE; 253 } 254 } 255 else { 256 // switching direction. 257 // if matchedIndex == USEARCH_DONE, it means that either a 258 // setOffset has been called or that previous ran off the text 259 // string. the iterator would have been set to offset 0 if a 260 // match is not found. 261 m_search_->isForwardSearching = TRUE; 262 if (m_search_->matchedIndex != USEARCH_DONE) { 263 // there's no need to set the collation element iterator 264 // the next call to next will set the offset. 265 return matchindex; 266 } 267 } 268 269 if (matchlength > 0) { 270 // if matchlength is 0 we are at the start of the iteration 271 if (m_search_->isOverlap) { 272 offset ++; 273 } 274 else { 275 offset += matchlength; 276 } 277 } 278 return handleNext(offset, status); 279 } 280 return USEARCH_DONE; 281 } 282 283 int32_t SearchIterator::previous(UErrorCode &status) 284 { 285 if (U_SUCCESS(status)) { 286 int32_t offset; 287 if (m_search_->reset) { 288 offset = m_search_->textLength; 289 m_search_->isForwardSearching = FALSE; 290 m_search_->reset = FALSE; 291 setOffset(offset, status); 292 } 293 else { 294 offset = getOffset(); 295 } 296 297 int32_t matchindex = m_search_->matchedIndex; 298 if (m_search_->isForwardSearching == TRUE) { 299 // switching direction. 300 // if matchedIndex == USEARCH_DONE, it means that either a 301 // setOffset has been called or that next ran off the text 302 // string. the iterator would have been set to offset textLength if 303 // a match is not found. 304 m_search_->isForwardSearching = FALSE; 305 if (matchindex != USEARCH_DONE) { 306 return matchindex; 307 } 308 } 309 else { 310 if (offset == 0 || matchindex == 0) { 311 // not enough characters to match 312 setMatchNotFound(); 313 return USEARCH_DONE; 314 } 315 } 316 317 if (matchindex != USEARCH_DONE) { 318 if (m_search_->isOverlap) { 319 matchindex += m_search_->matchedLength - 2; 320 } 321 322 return handlePrev(matchindex, status); 323 } 324 325 return handlePrev(offset, status); 326 } 327 328 return USEARCH_DONE; 329 } 330 331 void SearchIterator::reset() 332 { 333 UErrorCode status = U_ZERO_ERROR; 334 setMatchNotFound(); 335 setOffset(0, status); 336 m_search_->isOverlap = FALSE; 337 m_search_->isCanonicalMatch = FALSE; 338 m_search_->elementComparisonType = 0; 339 m_search_->isForwardSearching = TRUE; 340 m_search_->reset = TRUE; 341 } 342 343 // protected constructors and destructors ----------------------------- 344 345 SearchIterator::SearchIterator() 346 { 347 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 348 m_search_->breakIter = NULL; 349 m_search_->isOverlap = FALSE; 350 m_search_->isCanonicalMatch = FALSE; 351 m_search_->elementComparisonType = 0; 352 m_search_->isForwardSearching = TRUE; 353 m_search_->reset = TRUE; 354 m_search_->matchedIndex = USEARCH_DONE; 355 m_search_->matchedLength = 0; 356 m_search_->text = NULL; 357 m_search_->textLength = 0; 358 m_breakiterator_ = NULL; 359 } 360 361 SearchIterator::SearchIterator(const UnicodeString &text, 362 BreakIterator *breakiter) : 363 m_breakiterator_(breakiter), 364 m_text_(text) 365 { 366 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 367 m_search_->breakIter = NULL; 368 m_search_->isOverlap = FALSE; 369 m_search_->isCanonicalMatch = FALSE; 370 m_search_->elementComparisonType = 0; 371 m_search_->isForwardSearching = TRUE; 372 m_search_->reset = TRUE; 373 m_search_->matchedIndex = USEARCH_DONE; 374 m_search_->matchedLength = 0; 375 m_search_->text = m_text_.getBuffer(); 376 m_search_->textLength = text.length(); 377 } 378 379 SearchIterator::SearchIterator(CharacterIterator &text, 380 BreakIterator *breakiter) : 381 m_breakiterator_(breakiter) 382 { 383 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 384 m_search_->breakIter = NULL; 385 m_search_->isOverlap = FALSE; 386 m_search_->isCanonicalMatch = FALSE; 387 m_search_->elementComparisonType = 0; 388 m_search_->isForwardSearching = TRUE; 389 m_search_->reset = TRUE; 390 m_search_->matchedIndex = USEARCH_DONE; 391 m_search_->matchedLength = 0; 392 text.getText(m_text_); 393 m_search_->text = m_text_.getBuffer(); 394 m_search_->textLength = m_text_.length(); 395 m_breakiterator_ = breakiter; 396 } 397 398 // protected methods ------------------------------------------------------ 399 400 SearchIterator & SearchIterator::operator=(const SearchIterator &that) 401 { 402 if (this != &that) { 403 m_breakiterator_ = that.m_breakiterator_; 404 m_text_ = that.m_text_; 405 m_search_->breakIter = that.m_search_->breakIter; 406 m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch; 407 m_search_->isOverlap = that.m_search_->isOverlap; 408 m_search_->elementComparisonType = that.m_search_->elementComparisonType; 409 m_search_->matchedIndex = that.m_search_->matchedIndex; 410 m_search_->matchedLength = that.m_search_->matchedLength; 411 m_search_->text = that.m_search_->text; 412 m_search_->textLength = that.m_search_->textLength; 413 } 414 return *this; 415 } 416 417 void SearchIterator::setMatchLength(int32_t length) 418 { 419 m_search_->matchedLength = length; 420 } 421 422 void SearchIterator::setMatchStart(int32_t position) 423 { 424 m_search_->matchedIndex = position; 425 } 426 427 void SearchIterator::setMatchNotFound() 428 { 429 setMatchStart(USEARCH_DONE); 430 setMatchLength(0); 431 UErrorCode status = U_ZERO_ERROR; 432 // by default no errors should be returned here since offsets are within 433 // range. 434 if (m_search_->isForwardSearching) { 435 setOffset(m_search_->textLength, status); 436 } 437 else { 438 setOffset(0, status); 439 } 440 } 441 442 443 U_NAMESPACE_END 444 445 #endif /* #if !UCONFIG_NO_COLLATION */ 446