1 /* 2 ********************************************************************** 3 * Copyright (C) 2001-2008,2010 IBM and others. All rights reserved. 4 ********************************************************************** 5 * Date Name Description 6 * 03/22/2000 helena Creation. 7 ********************************************************************** 8 */ 9 10 #include "unicode/utypes.h" 11 12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 13 14 #include "unicode/brkiter.h" 15 #include "unicode/schriter.h" 16 #include "unicode/search.h" 17 #include "usrchimp.h" 18 #include "cmemory.h" 19 20 // public constructors and destructors ----------------------------------- 21 U_NAMESPACE_BEGIN 22 23 SearchIterator::SearchIterator(const SearchIterator &other) 24 : UObject(other) 25 { 26 m_breakiterator_ = other.m_breakiterator_; 27 m_text_ = other.m_text_; 28 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 29 m_search_->breakIter = other.m_search_->breakIter; 30 m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch; 31 m_search_->isOverlap = other.m_search_->isOverlap; 32 m_search_->elementComparisonType = other.m_search_->elementComparisonType; 33 m_search_->matchedIndex = other.m_search_->matchedIndex; 34 m_search_->matchedLength = other.m_search_->matchedLength; 35 m_search_->text = other.m_search_->text; 36 m_search_->textLength = other.m_search_->textLength; 37 } 38 39 SearchIterator::~SearchIterator() 40 { 41 if (m_search_ != NULL) { 42 uprv_free(m_search_); 43 } 44 } 45 46 // public get and set methods ---------------------------------------- 47 48 void SearchIterator::setAttribute(USearchAttribute attribute, 49 USearchAttributeValue value, 50 UErrorCode &status) 51 { 52 if (U_SUCCESS(status)) { 53 switch (attribute) 54 { 55 case USEARCH_OVERLAP : 56 m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE); 57 break; 58 case USEARCH_CANONICAL_MATCH : 59 m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE); 60 break; 61 case USEARCH_ELEMENT_COMPARISON : 62 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) { 63 m_search_->elementComparisonType = (int16_t)value; 64 } else { 65 m_search_->elementComparisonType = 0; 66 } 67 break; 68 default: 69 status = U_ILLEGAL_ARGUMENT_ERROR; 70 } 71 } 72 if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) { 73 status = U_ILLEGAL_ARGUMENT_ERROR; 74 } 75 } 76 77 USearchAttributeValue SearchIterator::getAttribute( 78 USearchAttribute attribute) const 79 { 80 switch (attribute) { 81 case USEARCH_OVERLAP : 82 return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF); 83 case USEARCH_CANONICAL_MATCH : 84 return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : 85 USEARCH_OFF); 86 case USEARCH_ELEMENT_COMPARISON : 87 { 88 int16_t value = m_search_->elementComparisonType; 89 if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) { 90 return (USearchAttributeValue)value; 91 } else { 92 return USEARCH_STANDARD_ELEMENT_COMPARISON; 93 } 94 } 95 default : 96 return USEARCH_DEFAULT; 97 } 98 } 99 100 int32_t SearchIterator::getMatchedStart() const 101 { 102 return m_search_->matchedIndex; 103 } 104 105 int32_t SearchIterator::getMatchedLength() const 106 { 107 return m_search_->matchedLength; 108 } 109 110 void SearchIterator::getMatchedText(UnicodeString &result) const 111 { 112 int32_t matchedindex = m_search_->matchedIndex; 113 int32_t matchedlength = m_search_->matchedLength; 114 if (matchedindex != USEARCH_DONE && matchedlength != 0) { 115 result.setTo(m_search_->text + matchedindex, matchedlength); 116 } 117 else { 118 result.remove(); 119 } 120 } 121 122 void SearchIterator::setBreakIterator(BreakIterator *breakiter, 123 UErrorCode &status) 124 { 125 if (U_SUCCESS(status)) { 126 #if 0 127 m_search_->breakIter = NULL; 128 // the c++ breakiterator may not make use of ubreakiterator. 129 // so we'll have to keep track of it ourselves. 130 #else 131 // Well, gee... the Constructors that take a BreakIterator 132 // all cast the BreakIterator to a UBreakIterator and 133 // pass it to the corresponding usearch_openFromXXX 134 // routine, so there's no reason not to do this. 135 // 136 // Besides, a UBreakIterator is a BreakIterator, so 137 // any subclass of BreakIterator should work fine here... 138 m_search_->breakIter = (UBreakIterator *) breakiter; 139 #endif 140 141 m_breakiterator_ = breakiter; 142 } 143 } 144 145 const BreakIterator * SearchIterator::getBreakIterator(void) const 146 { 147 return m_breakiterator_; 148 } 149 150 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status) 151 { 152 if (U_SUCCESS(status)) { 153 if (text.length() == 0) { 154 status = U_ILLEGAL_ARGUMENT_ERROR; 155 } 156 else { 157 m_text_ = text; 158 m_search_->text = m_text_.getBuffer(); 159 m_search_->textLength = m_text_.length(); 160 } 161 } 162 } 163 164 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status) 165 { 166 if (U_SUCCESS(status)) { 167 text.getText(m_text_); 168 setText(m_text_, status); 169 } 170 } 171 172 const UnicodeString & SearchIterator::getText(void) const 173 { 174 return m_text_; 175 } 176 177 // operator overloading ---------------------------------------------- 178 179 UBool SearchIterator::operator==(const SearchIterator &that) const 180 { 181 if (this == &that) { 182 return TRUE; 183 } 184 return (m_breakiterator_ == that.m_breakiterator_ && 185 m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch && 186 m_search_->isOverlap == that.m_search_->isOverlap && 187 m_search_->elementComparisonType == that.m_search_->elementComparisonType && 188 m_search_->matchedIndex == that.m_search_->matchedIndex && 189 m_search_->matchedLength == that.m_search_->matchedLength && 190 m_search_->textLength == that.m_search_->textLength && 191 getOffset() == that.getOffset() && 192 (uprv_memcmp(m_search_->text, that.m_search_->text, 193 m_search_->textLength * sizeof(UChar)) == 0)); 194 } 195 196 // public methods ---------------------------------------------------- 197 198 int32_t SearchIterator::first(UErrorCode &status) 199 { 200 if (U_FAILURE(status)) { 201 return USEARCH_DONE; 202 } 203 setOffset(0, status); 204 return handleNext(0, status); 205 } 206 207 int32_t SearchIterator::following(int32_t position, 208 UErrorCode &status) 209 { 210 if (U_FAILURE(status)) { 211 return USEARCH_DONE; 212 } 213 setOffset(position, status); 214 return handleNext(position, status); 215 } 216 217 int32_t SearchIterator::last(UErrorCode &status) 218 { 219 if (U_FAILURE(status)) { 220 return USEARCH_DONE; 221 } 222 setOffset(m_search_->textLength, status); 223 return handlePrev(m_search_->textLength, status); 224 } 225 226 int32_t SearchIterator::preceding(int32_t position, 227 UErrorCode &status) 228 { 229 if (U_FAILURE(status)) { 230 return USEARCH_DONE; 231 } 232 setOffset(position, status); 233 return handlePrev(position, status); 234 } 235 236 int32_t SearchIterator::next(UErrorCode &status) 237 { 238 if (U_SUCCESS(status)) { 239 int32_t offset = getOffset(); 240 int32_t matchindex = m_search_->matchedIndex; 241 int32_t matchlength = m_search_->matchedLength; 242 m_search_->reset = FALSE; 243 if (m_search_->isForwardSearching == TRUE) { 244 int32_t textlength = m_search_->textLength; 245 if (offset == textlength || matchindex == textlength || 246 (matchindex != USEARCH_DONE && 247 matchindex + matchlength >= textlength)) { 248 // not enough characters to match 249 setMatchNotFound(); 250 return USEARCH_DONE; 251 } 252 } 253 else { 254 // switching direction. 255 // if matchedIndex == USEARCH_DONE, it means that either a 256 // setOffset has been called or that previous ran off the text 257 // string. the iterator would have been set to offset 0 if a 258 // match is not found. 259 m_search_->isForwardSearching = TRUE; 260 if (m_search_->matchedIndex != USEARCH_DONE) { 261 // there's no need to set the collation element iterator 262 // the next call to next will set the offset. 263 return matchindex; 264 } 265 } 266 267 if (matchlength > 0) { 268 // if matchlength is 0 we are at the start of the iteration 269 if (m_search_->isOverlap) { 270 offset ++; 271 } 272 else { 273 offset += matchlength; 274 } 275 } 276 return handleNext(offset, status); 277 } 278 return USEARCH_DONE; 279 } 280 281 int32_t SearchIterator::previous(UErrorCode &status) 282 { 283 if (U_SUCCESS(status)) { 284 int32_t offset; 285 if (m_search_->reset) { 286 offset = m_search_->textLength; 287 m_search_->isForwardSearching = FALSE; 288 m_search_->reset = FALSE; 289 setOffset(offset, status); 290 } 291 else { 292 offset = getOffset(); 293 } 294 295 int32_t matchindex = m_search_->matchedIndex; 296 if (m_search_->isForwardSearching == TRUE) { 297 // switching direction. 298 // if matchedIndex == USEARCH_DONE, it means that either a 299 // setOffset has been called or that next ran off the text 300 // string. the iterator would have been set to offset textLength if 301 // a match is not found. 302 m_search_->isForwardSearching = FALSE; 303 if (matchindex != USEARCH_DONE) { 304 return matchindex; 305 } 306 } 307 else { 308 if (offset == 0 || matchindex == 0) { 309 // not enough characters to match 310 setMatchNotFound(); 311 return USEARCH_DONE; 312 } 313 } 314 315 if (matchindex != USEARCH_DONE) { 316 if (m_search_->isOverlap) { 317 matchindex += m_search_->matchedLength - 2; 318 } 319 320 return handlePrev(matchindex, status); 321 } 322 323 return handlePrev(offset, status); 324 } 325 326 return USEARCH_DONE; 327 } 328 329 void SearchIterator::reset() 330 { 331 UErrorCode status = U_ZERO_ERROR; 332 setMatchNotFound(); 333 setOffset(0, status); 334 m_search_->isOverlap = FALSE; 335 m_search_->isCanonicalMatch = FALSE; 336 m_search_->elementComparisonType = 0; 337 m_search_->isForwardSearching = TRUE; 338 m_search_->reset = TRUE; 339 } 340 341 // protected constructors and destructors ----------------------------- 342 343 SearchIterator::SearchIterator() 344 { 345 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 346 m_search_->breakIter = NULL; 347 m_search_->isOverlap = FALSE; 348 m_search_->isCanonicalMatch = FALSE; 349 m_search_->elementComparisonType = 0; 350 m_search_->isForwardSearching = TRUE; 351 m_search_->reset = TRUE; 352 m_search_->matchedIndex = USEARCH_DONE; 353 m_search_->matchedLength = 0; 354 m_search_->text = NULL; 355 m_search_->textLength = 0; 356 m_breakiterator_ = NULL; 357 } 358 359 SearchIterator::SearchIterator(const UnicodeString &text, 360 BreakIterator *breakiter) : 361 m_breakiterator_(breakiter), 362 m_text_(text) 363 { 364 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 365 m_search_->breakIter = NULL; 366 m_search_->isOverlap = FALSE; 367 m_search_->isCanonicalMatch = FALSE; 368 m_search_->elementComparisonType = 0; 369 m_search_->isForwardSearching = TRUE; 370 m_search_->reset = TRUE; 371 m_search_->matchedIndex = USEARCH_DONE; 372 m_search_->matchedLength = 0; 373 m_search_->text = m_text_.getBuffer(); 374 m_search_->textLength = text.length(); 375 } 376 377 SearchIterator::SearchIterator(CharacterIterator &text, 378 BreakIterator *breakiter) : 379 m_breakiterator_(breakiter) 380 { 381 m_search_ = (USearch *)uprv_malloc(sizeof(USearch)); 382 m_search_->breakIter = NULL; 383 m_search_->isOverlap = FALSE; 384 m_search_->isCanonicalMatch = FALSE; 385 m_search_->elementComparisonType = 0; 386 m_search_->isForwardSearching = TRUE; 387 m_search_->reset = TRUE; 388 m_search_->matchedIndex = USEARCH_DONE; 389 m_search_->matchedLength = 0; 390 text.getText(m_text_); 391 m_search_->text = m_text_.getBuffer(); 392 m_search_->textLength = m_text_.length(); 393 m_breakiterator_ = breakiter; 394 } 395 396 // protected methods ------------------------------------------------------ 397 398 SearchIterator & SearchIterator::operator=(const SearchIterator &that) 399 { 400 if (this != &that) { 401 m_breakiterator_ = that.m_breakiterator_; 402 m_text_ = that.m_text_; 403 m_search_->breakIter = that.m_search_->breakIter; 404 m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch; 405 m_search_->isOverlap = that.m_search_->isOverlap; 406 m_search_->elementComparisonType = that.m_search_->elementComparisonType; 407 m_search_->matchedIndex = that.m_search_->matchedIndex; 408 m_search_->matchedLength = that.m_search_->matchedLength; 409 m_search_->text = that.m_search_->text; 410 m_search_->textLength = that.m_search_->textLength; 411 } 412 return *this; 413 } 414 415 void SearchIterator::setMatchLength(int32_t length) 416 { 417 m_search_->matchedLength = length; 418 } 419 420 void SearchIterator::setMatchStart(int32_t position) 421 { 422 m_search_->matchedIndex = position; 423 } 424 425 void SearchIterator::setMatchNotFound() 426 { 427 setMatchStart(USEARCH_DONE); 428 setMatchLength(0); 429 UErrorCode status = U_ZERO_ERROR; 430 // by default no errors should be returned here since offsets are within 431 // range. 432 if (m_search_->isForwardSearching) { 433 setOffset(m_search_->textLength, status); 434 } 435 else { 436 setOffset(0, status); 437 } 438 } 439 440 441 U_NAMESPACE_END 442 443 #endif /* #if !UCONFIG_NO_COLLATION */ 444