1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 2001-2014 IBM and others. All rights reserved. 6 ********************************************************************** 7 * Date Name Description 8 * 03/22/2000 helena Creation. 9 ********************************************************************** 10 */ 11 12 #include "unicode/utypes.h" 13 14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 15 16 #include "unicode/stsearch.h" 17 #include "usrchimp.h" 18 #include "cmemory.h" 19 20 U_NAMESPACE_BEGIN 21 22 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch) 23 24 // public constructors and destructors ----------------------------------- 25 26 StringSearch::StringSearch(const UnicodeString &pattern, 27 const UnicodeString &text, 28 const Locale &locale, 29 BreakIterator *breakiter, 30 UErrorCode &status) : 31 SearchIterator(text, breakiter), 32 m_pattern_(pattern) 33 { 34 if (U_FAILURE(status)) { 35 m_strsrch_ = NULL; 36 return; 37 } 38 39 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 40 m_text_.getBuffer(), m_text_.length(), 41 locale.getName(), (UBreakIterator *)breakiter, 42 &status); 43 uprv_free(m_search_); 44 m_search_ = NULL; 45 46 if (U_SUCCESS(status)) { 47 // m_search_ has been created by the base SearchIterator class 48 m_search_ = m_strsrch_->search; 49 } 50 } 51 52 StringSearch::StringSearch(const UnicodeString &pattern, 53 const UnicodeString &text, 54 RuleBasedCollator *coll, 55 BreakIterator *breakiter, 56 UErrorCode &status) : 57 SearchIterator(text, breakiter), 58 m_pattern_(pattern) 59 { 60 if (U_FAILURE(status)) { 61 m_strsrch_ = NULL; 62 return; 63 } 64 if (coll == NULL) { 65 status = U_ILLEGAL_ARGUMENT_ERROR; 66 m_strsrch_ = NULL; 67 return; 68 } 69 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 70 m_pattern_.length(), 71 m_text_.getBuffer(), 72 m_text_.length(), coll->toUCollator(), 73 (UBreakIterator *)breakiter, 74 &status); 75 uprv_free(m_search_); 76 m_search_ = NULL; 77 78 if (U_SUCCESS(status)) { 79 // m_search_ has been created by the base SearchIterator class 80 m_search_ = m_strsrch_->search; 81 } 82 } 83 84 StringSearch::StringSearch(const UnicodeString &pattern, 85 CharacterIterator &text, 86 const Locale &locale, 87 BreakIterator *breakiter, 88 UErrorCode &status) : 89 SearchIterator(text, breakiter), 90 m_pattern_(pattern) 91 { 92 if (U_FAILURE(status)) { 93 m_strsrch_ = NULL; 94 return; 95 } 96 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 97 m_text_.getBuffer(), m_text_.length(), 98 locale.getName(), (UBreakIterator *)breakiter, 99 &status); 100 uprv_free(m_search_); 101 m_search_ = NULL; 102 103 if (U_SUCCESS(status)) { 104 // m_search_ has been created by the base SearchIterator class 105 m_search_ = m_strsrch_->search; 106 } 107 } 108 109 StringSearch::StringSearch(const UnicodeString &pattern, 110 CharacterIterator &text, 111 RuleBasedCollator *coll, 112 BreakIterator *breakiter, 113 UErrorCode &status) : 114 SearchIterator(text, breakiter), 115 m_pattern_(pattern) 116 { 117 if (U_FAILURE(status)) { 118 m_strsrch_ = NULL; 119 return; 120 } 121 if (coll == NULL) { 122 status = U_ILLEGAL_ARGUMENT_ERROR; 123 m_strsrch_ = NULL; 124 return; 125 } 126 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 127 m_pattern_.length(), 128 m_text_.getBuffer(), 129 m_text_.length(), coll->toUCollator(), 130 (UBreakIterator *)breakiter, 131 &status); 132 uprv_free(m_search_); 133 m_search_ = NULL; 134 135 if (U_SUCCESS(status)) { 136 // m_search_ has been created by the base SearchIterator class 137 m_search_ = m_strsrch_->search; 138 } 139 } 140 141 StringSearch::StringSearch(const StringSearch &that) : 142 SearchIterator(that.m_text_, that.m_breakiterator_), 143 m_pattern_(that.m_pattern_) 144 { 145 UErrorCode status = U_ZERO_ERROR; 146 147 // Free m_search_ from the superclass 148 uprv_free(m_search_); 149 m_search_ = NULL; 150 151 if (that.m_strsrch_ == NULL) { 152 // This was not a good copy 153 m_strsrch_ = NULL; 154 } 155 else { 156 // Make a deep copy 157 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 158 m_pattern_.length(), 159 m_text_.getBuffer(), 160 m_text_.length(), 161 that.m_strsrch_->collator, 162 (UBreakIterator *)that.m_breakiterator_, 163 &status); 164 if (U_SUCCESS(status)) { 165 // m_search_ has been created by the base SearchIterator class 166 m_search_ = m_strsrch_->search; 167 } 168 } 169 } 170 171 StringSearch::~StringSearch() 172 { 173 if (m_strsrch_ != NULL) { 174 usearch_close(m_strsrch_); 175 m_search_ = NULL; 176 } 177 } 178 179 StringSearch * 180 StringSearch::clone() const { 181 return new StringSearch(*this); 182 } 183 184 // operator overloading --------------------------------------------- 185 StringSearch & StringSearch::operator=(const StringSearch &that) 186 { 187 if ((*this) != that) { 188 UErrorCode status = U_ZERO_ERROR; 189 m_text_ = that.m_text_; 190 m_breakiterator_ = that.m_breakiterator_; 191 m_pattern_ = that.m_pattern_; 192 // all m_search_ in the parent class is linked up with m_strsrch_ 193 usearch_close(m_strsrch_); 194 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 195 m_pattern_.length(), 196 m_text_.getBuffer(), 197 m_text_.length(), 198 that.m_strsrch_->collator, 199 NULL, &status); 200 // Check null pointer 201 if (m_strsrch_ != NULL) { 202 m_search_ = m_strsrch_->search; 203 } 204 } 205 return *this; 206 } 207 208 UBool StringSearch::operator==(const SearchIterator &that) const 209 { 210 if (this == &that) { 211 return TRUE; 212 } 213 if (SearchIterator::operator ==(that)) { 214 StringSearch &thatsrch = (StringSearch &)that; 215 return (this->m_pattern_ == thatsrch.m_pattern_ && 216 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator); 217 } 218 return FALSE; 219 } 220 221 // public get and set methods ---------------------------------------- 222 223 void StringSearch::setOffset(int32_t position, UErrorCode &status) 224 { 225 // status checked in usearch_setOffset 226 usearch_setOffset(m_strsrch_, position, &status); 227 } 228 229 int32_t StringSearch::getOffset(void) const 230 { 231 return usearch_getOffset(m_strsrch_); 232 } 233 234 void StringSearch::setText(const UnicodeString &text, UErrorCode &status) 235 { 236 if (U_SUCCESS(status)) { 237 m_text_ = text; 238 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status); 239 } 240 } 241 242 void StringSearch::setText(CharacterIterator &text, UErrorCode &status) 243 { 244 if (U_SUCCESS(status)) { 245 text.getText(m_text_); 246 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status); 247 } 248 } 249 250 RuleBasedCollator * StringSearch::getCollator() const 251 { 252 // Note the const_cast. It would be cleaner if this const method returned a const collator. 253 return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator)); 254 } 255 256 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status) 257 { 258 if (U_SUCCESS(status)) { 259 usearch_setCollator(m_strsrch_, coll->toUCollator(), &status); 260 } 261 } 262 263 void StringSearch::setPattern(const UnicodeString &pattern, 264 UErrorCode &status) 265 { 266 if (U_SUCCESS(status)) { 267 m_pattern_ = pattern; 268 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(), 269 &status); 270 } 271 } 272 273 const UnicodeString & StringSearch::getPattern() const 274 { 275 return m_pattern_; 276 } 277 278 // public methods ---------------------------------------------------- 279 280 void StringSearch::reset() 281 { 282 usearch_reset(m_strsrch_); 283 } 284 285 SearchIterator * StringSearch::safeClone(void) const 286 { 287 UErrorCode status = U_ZERO_ERROR; 288 StringSearch *result = new StringSearch(m_pattern_, m_text_, 289 getCollator(), 290 m_breakiterator_, 291 status); 292 /* test for NULL */ 293 if (result == 0) { 294 status = U_MEMORY_ALLOCATION_ERROR; 295 return 0; 296 } 297 result->setOffset(getOffset(), status); 298 result->setMatchStart(m_strsrch_->search->matchedIndex); 299 result->setMatchLength(m_strsrch_->search->matchedLength); 300 if (U_FAILURE(status)) { 301 return NULL; 302 } 303 return result; 304 } 305 306 // protected method ------------------------------------------------- 307 308 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) 309 { 310 // values passed here are already in the pre-shift position 311 if (U_SUCCESS(status)) { 312 if (m_strsrch_->pattern.cesLength == 0) { 313 m_search_->matchedIndex = 314 m_search_->matchedIndex == USEARCH_DONE ? 315 getOffset() : m_search_->matchedIndex + 1; 316 m_search_->matchedLength = 0; 317 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 318 &status); 319 if (m_search_->matchedIndex == m_search_->textLength) { 320 m_search_->matchedIndex = USEARCH_DONE; 321 } 322 } 323 else { 324 // looking at usearch.cpp, this part is shifted out to 325 // StringSearch instead of SearchIterator because m_strsrch_ is 326 // not accessible in SearchIterator 327 #if 0 328 if (position + m_strsrch_->pattern.defaultShiftSize 329 > m_search_->textLength) { 330 setMatchNotFound(); 331 return USEARCH_DONE; 332 } 333 #endif 334 if (m_search_->matchedLength <= 0) { 335 // the flipping direction issue has already been handled 336 // in next() 337 // for boundary check purposes. this will ensure that the 338 // next match will not preceed the current offset 339 // note search->matchedIndex will always be set to something 340 // in the code 341 m_search_->matchedIndex = position - 1; 342 } 343 344 ucol_setOffset(m_strsrch_->textIter, position, &status); 345 346 #if 0 347 for (;;) { 348 if (m_search_->isCanonicalMatch) { 349 // can't use exact here since extra accents are allowed. 350 usearch_handleNextCanonical(m_strsrch_, &status); 351 } 352 else { 353 usearch_handleNextExact(m_strsrch_, &status); 354 } 355 if (U_FAILURE(status)) { 356 return USEARCH_DONE; 357 } 358 if (m_breakiterator_ == NULL 359 #if !UCONFIG_NO_BREAK_ITERATION 360 || 361 m_search_->matchedIndex == USEARCH_DONE || 362 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && 363 m_breakiterator_->isBoundary(m_search_->matchedIndex + 364 m_search_->matchedLength)) 365 #endif 366 ) { 367 if (m_search_->matchedIndex == USEARCH_DONE) { 368 ucol_setOffset(m_strsrch_->textIter, 369 m_search_->textLength, &status); 370 } 371 else { 372 ucol_setOffset(m_strsrch_->textIter, 373 m_search_->matchedIndex, &status); 374 } 375 return m_search_->matchedIndex; 376 } 377 } 378 #else 379 // if m_strsrch_->breakIter is always the same as m_breakiterator_ 380 // then we don't need to check the match boundaries here because 381 // usearch_handleNextXXX will already have done it. 382 if (m_search_->isCanonicalMatch) { 383 // *could* actually use exact here 'cause no extra accents allowed... 384 usearch_handleNextCanonical(m_strsrch_, &status); 385 } else { 386 usearch_handleNextExact(m_strsrch_, &status); 387 } 388 389 if (U_FAILURE(status)) { 390 return USEARCH_DONE; 391 } 392 393 if (m_search_->matchedIndex == USEARCH_DONE) { 394 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status); 395 } else { 396 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status); 397 } 398 399 return m_search_->matchedIndex; 400 #endif 401 } 402 } 403 return USEARCH_DONE; 404 } 405 406 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status) 407 { 408 // values passed here are already in the pre-shift position 409 if (U_SUCCESS(status)) { 410 if (m_strsrch_->pattern.cesLength == 0) { 411 m_search_->matchedIndex = 412 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : 413 m_search_->matchedIndex); 414 if (m_search_->matchedIndex == 0) { 415 setMatchNotFound(); 416 } 417 else { 418 m_search_->matchedIndex --; 419 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 420 &status); 421 m_search_->matchedLength = 0; 422 } 423 } 424 else { 425 // looking at usearch.cpp, this part is shifted out to 426 // StringSearch instead of SearchIterator because m_strsrch_ is 427 // not accessible in SearchIterator 428 #if 0 429 if (!m_search_->isOverlap && 430 position - m_strsrch_->pattern.defaultShiftSize < 0) { 431 setMatchNotFound(); 432 return USEARCH_DONE; 433 } 434 435 for (;;) { 436 if (m_search_->isCanonicalMatch) { 437 // can't use exact here since extra accents are allowed. 438 usearch_handlePreviousCanonical(m_strsrch_, &status); 439 } 440 else { 441 usearch_handlePreviousExact(m_strsrch_, &status); 442 } 443 if (U_FAILURE(status)) { 444 return USEARCH_DONE; 445 } 446 if (m_breakiterator_ == NULL 447 #if !UCONFIG_NO_BREAK_ITERATION 448 || 449 m_search_->matchedIndex == USEARCH_DONE || 450 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && 451 m_breakiterator_->isBoundary(m_search_->matchedIndex + 452 m_search_->matchedLength)) 453 #endif 454 ) { 455 return m_search_->matchedIndex; 456 } 457 } 458 #else 459 ucol_setOffset(m_strsrch_->textIter, position, &status); 460 461 if (m_search_->isCanonicalMatch) { 462 // *could* use exact match here since extra accents *not* allowed! 463 usearch_handlePreviousCanonical(m_strsrch_, &status); 464 } else { 465 usearch_handlePreviousExact(m_strsrch_, &status); 466 } 467 468 if (U_FAILURE(status)) { 469 return USEARCH_DONE; 470 } 471 472 return m_search_->matchedIndex; 473 #endif 474 } 475 476 return m_search_->matchedIndex; 477 } 478 return USEARCH_DONE; 479 } 480 481 U_NAMESPACE_END 482 483 #endif /* #if !UCONFIG_NO_COLLATION */ 484