1 /* 2 ********************************************************************** 3 * Copyright (C) 2001-2014 IBM and others. All rights reserved. 4 ********************************************************************** 5 * Date Name Description 6 * 03/22/2000 helena Creation. 7 ********************************************************************** 8 */ 9 10 #include "unicode/utypes.h" 11 12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 13 14 #include "unicode/stsearch.h" 15 #include "usrchimp.h" 16 #include "cmemory.h" 17 18 U_NAMESPACE_BEGIN 19 20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch) 21 22 // public constructors and destructors ----------------------------------- 23 24 StringSearch::StringSearch(const UnicodeString &pattern, 25 const UnicodeString &text, 26 const Locale &locale, 27 BreakIterator *breakiter, 28 UErrorCode &status) : 29 SearchIterator(text, breakiter), 30 m_pattern_(pattern) 31 { 32 if (U_FAILURE(status)) { 33 m_strsrch_ = NULL; 34 return; 35 } 36 37 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 38 m_text_.getBuffer(), m_text_.length(), 39 locale.getName(), (UBreakIterator *)breakiter, 40 &status); 41 uprv_free(m_search_); 42 m_search_ = NULL; 43 44 if (U_SUCCESS(status)) { 45 // m_search_ has been created by the base SearchIterator class 46 m_search_ = m_strsrch_->search; 47 } 48 } 49 50 StringSearch::StringSearch(const UnicodeString &pattern, 51 const UnicodeString &text, 52 RuleBasedCollator *coll, 53 BreakIterator *breakiter, 54 UErrorCode &status) : 55 SearchIterator(text, breakiter), 56 m_pattern_(pattern) 57 { 58 if (U_FAILURE(status)) { 59 m_strsrch_ = NULL; 60 return; 61 } 62 if (coll == NULL) { 63 status = U_ILLEGAL_ARGUMENT_ERROR; 64 m_strsrch_ = NULL; 65 return; 66 } 67 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 68 m_pattern_.length(), 69 m_text_.getBuffer(), 70 m_text_.length(), coll->toUCollator(), 71 (UBreakIterator *)breakiter, 72 &status); 73 uprv_free(m_search_); 74 m_search_ = NULL; 75 76 if (U_SUCCESS(status)) { 77 // m_search_ has been created by the base SearchIterator class 78 m_search_ = m_strsrch_->search; 79 } 80 } 81 82 StringSearch::StringSearch(const UnicodeString &pattern, 83 CharacterIterator &text, 84 const Locale &locale, 85 BreakIterator *breakiter, 86 UErrorCode &status) : 87 SearchIterator(text, breakiter), 88 m_pattern_(pattern) 89 { 90 if (U_FAILURE(status)) { 91 m_strsrch_ = NULL; 92 return; 93 } 94 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 95 m_text_.getBuffer(), m_text_.length(), 96 locale.getName(), (UBreakIterator *)breakiter, 97 &status); 98 uprv_free(m_search_); 99 m_search_ = NULL; 100 101 if (U_SUCCESS(status)) { 102 // m_search_ has been created by the base SearchIterator class 103 m_search_ = m_strsrch_->search; 104 } 105 } 106 107 StringSearch::StringSearch(const UnicodeString &pattern, 108 CharacterIterator &text, 109 RuleBasedCollator *coll, 110 BreakIterator *breakiter, 111 UErrorCode &status) : 112 SearchIterator(text, breakiter), 113 m_pattern_(pattern) 114 { 115 if (U_FAILURE(status)) { 116 m_strsrch_ = NULL; 117 return; 118 } 119 if (coll == NULL) { 120 status = U_ILLEGAL_ARGUMENT_ERROR; 121 m_strsrch_ = NULL; 122 return; 123 } 124 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 125 m_pattern_.length(), 126 m_text_.getBuffer(), 127 m_text_.length(), coll->toUCollator(), 128 (UBreakIterator *)breakiter, 129 &status); 130 uprv_free(m_search_); 131 m_search_ = NULL; 132 133 if (U_SUCCESS(status)) { 134 // m_search_ has been created by the base SearchIterator class 135 m_search_ = m_strsrch_->search; 136 } 137 } 138 139 StringSearch::StringSearch(const StringSearch &that) : 140 SearchIterator(that.m_text_, that.m_breakiterator_), 141 m_pattern_(that.m_pattern_) 142 { 143 UErrorCode status = U_ZERO_ERROR; 144 145 // Free m_search_ from the superclass 146 uprv_free(m_search_); 147 m_search_ = NULL; 148 149 if (that.m_strsrch_ == NULL) { 150 // This was not a good copy 151 m_strsrch_ = NULL; 152 } 153 else { 154 // Make a deep copy 155 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 156 m_pattern_.length(), 157 m_text_.getBuffer(), 158 m_text_.length(), 159 that.m_strsrch_->collator, 160 (UBreakIterator *)that.m_breakiterator_, 161 &status); 162 if (U_SUCCESS(status)) { 163 // m_search_ has been created by the base SearchIterator class 164 m_search_ = m_strsrch_->search; 165 } 166 } 167 } 168 169 StringSearch::~StringSearch() 170 { 171 if (m_strsrch_ != NULL) { 172 usearch_close(m_strsrch_); 173 m_search_ = NULL; 174 } 175 } 176 177 StringSearch * 178 StringSearch::clone() const { 179 return new StringSearch(*this); 180 } 181 182 // operator overloading --------------------------------------------- 183 StringSearch & StringSearch::operator=(const StringSearch &that) 184 { 185 if ((*this) != that) { 186 UErrorCode status = U_ZERO_ERROR; 187 m_text_ = that.m_text_; 188 m_breakiterator_ = that.m_breakiterator_; 189 m_pattern_ = that.m_pattern_; 190 // all m_search_ in the parent class is linked up with m_strsrch_ 191 usearch_close(m_strsrch_); 192 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 193 m_pattern_.length(), 194 m_text_.getBuffer(), 195 m_text_.length(), 196 that.m_strsrch_->collator, 197 NULL, &status); 198 // Check null pointer 199 if (m_strsrch_ != NULL) { 200 m_search_ = m_strsrch_->search; 201 } 202 } 203 return *this; 204 } 205 206 UBool StringSearch::operator==(const SearchIterator &that) const 207 { 208 if (this == &that) { 209 return TRUE; 210 } 211 if (SearchIterator::operator ==(that)) { 212 StringSearch &thatsrch = (StringSearch &)that; 213 return (this->m_pattern_ == thatsrch.m_pattern_ && 214 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator); 215 } 216 return FALSE; 217 } 218 219 // public get and set methods ---------------------------------------- 220 221 void StringSearch::setOffset(int32_t position, UErrorCode &status) 222 { 223 // status checked in usearch_setOffset 224 usearch_setOffset(m_strsrch_, position, &status); 225 } 226 227 int32_t StringSearch::getOffset(void) const 228 { 229 return usearch_getOffset(m_strsrch_); 230 } 231 232 void StringSearch::setText(const UnicodeString &text, UErrorCode &status) 233 { 234 if (U_SUCCESS(status)) { 235 m_text_ = text; 236 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status); 237 } 238 } 239 240 void StringSearch::setText(CharacterIterator &text, UErrorCode &status) 241 { 242 if (U_SUCCESS(status)) { 243 text.getText(m_text_); 244 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status); 245 } 246 } 247 248 RuleBasedCollator * StringSearch::getCollator() const 249 { 250 // Note the const_cast. It would be cleaner if this const method returned a const collator. 251 return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator)); 252 } 253 254 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status) 255 { 256 if (U_SUCCESS(status)) { 257 usearch_setCollator(m_strsrch_, coll->toUCollator(), &status); 258 } 259 } 260 261 void StringSearch::setPattern(const UnicodeString &pattern, 262 UErrorCode &status) 263 { 264 if (U_SUCCESS(status)) { 265 m_pattern_ = pattern; 266 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(), 267 &status); 268 } 269 } 270 271 const UnicodeString & StringSearch::getPattern() const 272 { 273 return m_pattern_; 274 } 275 276 // public methods ---------------------------------------------------- 277 278 void StringSearch::reset() 279 { 280 usearch_reset(m_strsrch_); 281 } 282 283 SearchIterator * StringSearch::safeClone(void) const 284 { 285 UErrorCode status = U_ZERO_ERROR; 286 StringSearch *result = new StringSearch(m_pattern_, m_text_, 287 getCollator(), 288 m_breakiterator_, 289 status); 290 /* test for NULL */ 291 if (result == 0) { 292 status = U_MEMORY_ALLOCATION_ERROR; 293 return 0; 294 } 295 result->setOffset(getOffset(), status); 296 result->setMatchStart(m_strsrch_->search->matchedIndex); 297 result->setMatchLength(m_strsrch_->search->matchedLength); 298 if (U_FAILURE(status)) { 299 return NULL; 300 } 301 return result; 302 } 303 304 // protected method ------------------------------------------------- 305 306 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) 307 { 308 // values passed here are already in the pre-shift position 309 if (U_SUCCESS(status)) { 310 if (m_strsrch_->pattern.CELength == 0) { 311 m_search_->matchedIndex = 312 m_search_->matchedIndex == USEARCH_DONE ? 313 getOffset() : m_search_->matchedIndex + 1; 314 m_search_->matchedLength = 0; 315 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 316 &status); 317 if (m_search_->matchedIndex == m_search_->textLength) { 318 m_search_->matchedIndex = USEARCH_DONE; 319 } 320 } 321 else { 322 // looking at usearch.cpp, this part is shifted out to 323 // StringSearch instead of SearchIterator because m_strsrch_ is 324 // not accessible in SearchIterator 325 #if 0 326 if (position + m_strsrch_->pattern.defaultShiftSize 327 > m_search_->textLength) { 328 setMatchNotFound(); 329 return USEARCH_DONE; 330 } 331 #endif 332 if (m_search_->matchedLength <= 0) { 333 // the flipping direction issue has already been handled 334 // in next() 335 // for boundary check purposes. this will ensure that the 336 // next match will not preceed the current offset 337 // note search->matchedIndex will always be set to something 338 // in the code 339 m_search_->matchedIndex = position - 1; 340 } 341 342 ucol_setOffset(m_strsrch_->textIter, position, &status); 343 344 #if 0 345 for (;;) { 346 if (m_search_->isCanonicalMatch) { 347 // can't use exact here since extra accents are allowed. 348 usearch_handleNextCanonical(m_strsrch_, &status); 349 } 350 else { 351 usearch_handleNextExact(m_strsrch_, &status); 352 } 353 if (U_FAILURE(status)) { 354 return USEARCH_DONE; 355 } 356 if (m_breakiterator_ == NULL 357 #if !UCONFIG_NO_BREAK_ITERATION 358 || 359 m_search_->matchedIndex == USEARCH_DONE || 360 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && 361 m_breakiterator_->isBoundary(m_search_->matchedIndex + 362 m_search_->matchedLength)) 363 #endif 364 ) { 365 if (m_search_->matchedIndex == USEARCH_DONE) { 366 ucol_setOffset(m_strsrch_->textIter, 367 m_search_->textLength, &status); 368 } 369 else { 370 ucol_setOffset(m_strsrch_->textIter, 371 m_search_->matchedIndex, &status); 372 } 373 return m_search_->matchedIndex; 374 } 375 } 376 #else 377 // if m_strsrch_->breakIter is always the same as m_breakiterator_ 378 // then we don't need to check the match boundaries here because 379 // usearch_handleNextXXX will already have done it. 380 if (m_search_->isCanonicalMatch) { 381 // *could* actually use exact here 'cause no extra accents allowed... 382 usearch_handleNextCanonical(m_strsrch_, &status); 383 } else { 384 usearch_handleNextExact(m_strsrch_, &status); 385 } 386 387 if (U_FAILURE(status)) { 388 return USEARCH_DONE; 389 } 390 391 if (m_search_->matchedIndex == USEARCH_DONE) { 392 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status); 393 } else { 394 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status); 395 } 396 397 return m_search_->matchedIndex; 398 #endif 399 } 400 } 401 return USEARCH_DONE; 402 } 403 404 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status) 405 { 406 // values passed here are already in the pre-shift position 407 if (U_SUCCESS(status)) { 408 if (m_strsrch_->pattern.CELength == 0) { 409 m_search_->matchedIndex = 410 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : 411 m_search_->matchedIndex); 412 if (m_search_->matchedIndex == 0) { 413 setMatchNotFound(); 414 } 415 else { 416 m_search_->matchedIndex --; 417 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 418 &status); 419 m_search_->matchedLength = 0; 420 } 421 } 422 else { 423 // looking at usearch.cpp, this part is shifted out to 424 // StringSearch instead of SearchIterator because m_strsrch_ is 425 // not accessible in SearchIterator 426 #if 0 427 if (!m_search_->isOverlap && 428 position - m_strsrch_->pattern.defaultShiftSize < 0) { 429 setMatchNotFound(); 430 return USEARCH_DONE; 431 } 432 433 for (;;) { 434 if (m_search_->isCanonicalMatch) { 435 // can't use exact here since extra accents are allowed. 436 usearch_handlePreviousCanonical(m_strsrch_, &status); 437 } 438 else { 439 usearch_handlePreviousExact(m_strsrch_, &status); 440 } 441 if (U_FAILURE(status)) { 442 return USEARCH_DONE; 443 } 444 if (m_breakiterator_ == NULL 445 #if !UCONFIG_NO_BREAK_ITERATION 446 || 447 m_search_->matchedIndex == USEARCH_DONE || 448 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && 449 m_breakiterator_->isBoundary(m_search_->matchedIndex + 450 m_search_->matchedLength)) 451 #endif 452 ) { 453 return m_search_->matchedIndex; 454 } 455 } 456 #else 457 ucol_setOffset(m_strsrch_->textIter, position, &status); 458 459 if (m_search_->isCanonicalMatch) { 460 // *could* use exact match here since extra accents *not* allowed! 461 usearch_handlePreviousCanonical(m_strsrch_, &status); 462 } else { 463 usearch_handlePreviousExact(m_strsrch_, &status); 464 } 465 466 if (U_FAILURE(status)) { 467 return USEARCH_DONE; 468 } 469 470 return m_search_->matchedIndex; 471 #endif 472 } 473 474 return m_search_->matchedIndex; 475 } 476 return USEARCH_DONE; 477 } 478 479 U_NAMESPACE_END 480 481 #endif /* #if !UCONFIG_NO_COLLATION */ 482