1 /* 2 ********************************************************************** 3 * Copyright (C) 2001-2008 IBM and others. All rights reserved. 4 ********************************************************************** 5 * Date Name Description 6 * 03/22/2000 helena Creation. 7 ********************************************************************** 8 */ 9 10 #include "unicode/utypes.h" 11 12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 13 14 #include "unicode/stsearch.h" 15 #include "usrchimp.h" 16 #include "cmemory.h" 17 18 U_NAMESPACE_BEGIN 19 20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch) 21 22 // public constructors and destructors ----------------------------------- 23 24 StringSearch::StringSearch(const UnicodeString &pattern, 25 const UnicodeString &text, 26 const Locale &locale, 27 BreakIterator *breakiter, 28 UErrorCode &status) : 29 SearchIterator(text, breakiter), 30 m_collator_(), 31 m_pattern_(pattern) 32 { 33 if (U_FAILURE(status)) { 34 m_strsrch_ = NULL; 35 return; 36 } 37 38 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 39 m_text_.getBuffer(), m_text_.length(), 40 locale.getName(), (UBreakIterator *)breakiter, 41 &status); 42 uprv_free(m_search_); 43 m_search_ = NULL; 44 45 // !!! dlf m_collator_ is an odd beast. basically it is an aliasing 46 // wrapper around the internal collator and rules, which (here) are 47 // owned by this stringsearch object. this means 1) it's destructor 48 // _should not_ delete the ucollator or rules, and 2) changes made 49 // to the exposed collator (setStrength etc) _should_ modify the 50 // ucollator. thus the collator is not a copy-on-write alias, and it 51 // needs to distinguish itself not merely from 'stand alone' colators 52 // but also from copy-on-write ones. it needs additional state, which 53 // setUCollator should set. 54 55 if (U_SUCCESS(status)) { 56 // Alias the collator 57 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 58 // m_search_ has been created by the base SearchIterator class 59 m_search_ = m_strsrch_->search; 60 } 61 } 62 63 StringSearch::StringSearch(const UnicodeString &pattern, 64 const UnicodeString &text, 65 RuleBasedCollator *coll, 66 BreakIterator *breakiter, 67 UErrorCode &status) : 68 SearchIterator(text, breakiter), 69 m_collator_(), 70 m_pattern_(pattern) 71 { 72 if (U_FAILURE(status)) { 73 m_strsrch_ = NULL; 74 return; 75 } 76 if (coll == NULL) { 77 status = U_ILLEGAL_ARGUMENT_ERROR; 78 m_strsrch_ = NULL; 79 return; 80 } 81 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 82 m_pattern_.length(), 83 m_text_.getBuffer(), 84 m_text_.length(), coll->ucollator, 85 (UBreakIterator *)breakiter, 86 &status); 87 uprv_free(m_search_); 88 m_search_ = NULL; 89 90 if (U_SUCCESS(status)) { 91 // Alias the collator 92 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 93 // m_search_ has been created by the base SearchIterator class 94 m_search_ = m_strsrch_->search; 95 } 96 } 97 98 StringSearch::StringSearch(const UnicodeString &pattern, 99 CharacterIterator &text, 100 const Locale &locale, 101 BreakIterator *breakiter, 102 UErrorCode &status) : 103 SearchIterator(text, breakiter), 104 m_collator_(), 105 m_pattern_(pattern) 106 { 107 if (U_FAILURE(status)) { 108 m_strsrch_ = NULL; 109 return; 110 } 111 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 112 m_text_.getBuffer(), m_text_.length(), 113 locale.getName(), (UBreakIterator *)breakiter, 114 &status); 115 uprv_free(m_search_); 116 m_search_ = NULL; 117 118 if (U_SUCCESS(status)) { 119 // Alias the collator 120 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 121 // m_search_ has been created by the base SearchIterator class 122 m_search_ = m_strsrch_->search; 123 } 124 } 125 126 StringSearch::StringSearch(const UnicodeString &pattern, 127 CharacterIterator &text, 128 RuleBasedCollator *coll, 129 BreakIterator *breakiter, 130 UErrorCode &status) : 131 SearchIterator(text, breakiter), 132 m_collator_(), 133 m_pattern_(pattern) 134 { 135 if (U_FAILURE(status)) { 136 m_strsrch_ = NULL; 137 return; 138 } 139 if (coll == NULL) { 140 status = U_ILLEGAL_ARGUMENT_ERROR; 141 m_strsrch_ = NULL; 142 return; 143 } 144 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 145 m_pattern_.length(), 146 m_text_.getBuffer(), 147 m_text_.length(), coll->ucollator, 148 (UBreakIterator *)breakiter, 149 &status); 150 uprv_free(m_search_); 151 m_search_ = NULL; 152 153 if (U_SUCCESS(status)) { 154 // Alias the collator 155 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 156 // m_search_ has been created by the base SearchIterator class 157 m_search_ = m_strsrch_->search; 158 } 159 } 160 161 StringSearch::StringSearch(const StringSearch &that) : 162 SearchIterator(that.m_text_, that.m_breakiterator_), 163 m_collator_(), 164 m_pattern_(that.m_pattern_) 165 { 166 UErrorCode status = U_ZERO_ERROR; 167 168 // Free m_search_ from the superclass 169 uprv_free(m_search_); 170 m_search_ = NULL; 171 172 if (that.m_strsrch_ == NULL) { 173 // This was not a good copy 174 m_strsrch_ = NULL; 175 } 176 else { 177 // Make a deep copy 178 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 179 m_pattern_.length(), 180 m_text_.getBuffer(), 181 m_text_.length(), 182 that.m_strsrch_->collator, 183 (UBreakIterator *)that.m_breakiterator_, 184 &status); 185 if (U_SUCCESS(status)) { 186 // Alias the collator 187 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 188 // m_search_ has been created by the base SearchIterator class 189 m_search_ = m_strsrch_->search; 190 } 191 } 192 } 193 194 StringSearch::~StringSearch() 195 { 196 if (m_strsrch_ != NULL) { 197 usearch_close(m_strsrch_); 198 m_search_ = NULL; 199 } 200 } 201 202 StringSearch * 203 StringSearch::clone() const { 204 return new StringSearch(*this); 205 } 206 207 // operator overloading --------------------------------------------- 208 StringSearch & StringSearch::operator=(const StringSearch &that) 209 { 210 if ((*this) != that) { 211 UErrorCode status = U_ZERO_ERROR; 212 m_text_ = that.m_text_; 213 m_breakiterator_ = that.m_breakiterator_; 214 m_pattern_ = that.m_pattern_; 215 // all m_search_ in the parent class is linked up with m_strsrch_ 216 usearch_close(m_strsrch_); 217 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 218 m_pattern_.length(), 219 m_text_.getBuffer(), 220 m_text_.length(), 221 that.m_strsrch_->collator, 222 NULL, &status); 223 // Check null pointer 224 if (m_strsrch_ != NULL) { 225 // Alias the collator 226 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 227 m_search_ = m_strsrch_->search; 228 } 229 } 230 return *this; 231 } 232 233 UBool StringSearch::operator==(const SearchIterator &that) const 234 { 235 if (this == &that) { 236 return TRUE; 237 } 238 if (SearchIterator::operator ==(that)) { 239 StringSearch &thatsrch = (StringSearch &)that; 240 return (this->m_pattern_ == thatsrch.m_pattern_ && 241 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator); 242 } 243 return FALSE; 244 } 245 246 // public get and set methods ---------------------------------------- 247 248 void StringSearch::setOffset(int32_t position, UErrorCode &status) 249 { 250 // status checked in usearch_setOffset 251 usearch_setOffset(m_strsrch_, position, &status); 252 } 253 254 int32_t StringSearch::getOffset(void) const 255 { 256 return usearch_getOffset(m_strsrch_); 257 } 258 259 void StringSearch::setText(const UnicodeString &text, UErrorCode &status) 260 { 261 if (U_SUCCESS(status)) { 262 m_text_ = text; 263 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status); 264 } 265 } 266 267 void StringSearch::setText(CharacterIterator &text, UErrorCode &status) 268 { 269 if (U_SUCCESS(status)) { 270 text.getText(m_text_); 271 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status); 272 } 273 } 274 275 RuleBasedCollator * StringSearch::getCollator() const 276 { 277 return (RuleBasedCollator *)&m_collator_; 278 } 279 280 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status) 281 { 282 if (U_SUCCESS(status)) { 283 usearch_setCollator(m_strsrch_, coll->getUCollator(), &status); 284 // Alias the collator 285 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); 286 } 287 } 288 289 void StringSearch::setPattern(const UnicodeString &pattern, 290 UErrorCode &status) 291 { 292 if (U_SUCCESS(status)) { 293 m_pattern_ = pattern; 294 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(), 295 &status); 296 } 297 } 298 299 const UnicodeString & StringSearch::getPattern() const 300 { 301 return m_pattern_; 302 } 303 304 // public methods ---------------------------------------------------- 305 306 void StringSearch::reset() 307 { 308 usearch_reset(m_strsrch_); 309 } 310 311 SearchIterator * StringSearch::safeClone(void) const 312 { 313 UErrorCode status = U_ZERO_ERROR; 314 StringSearch *result = new StringSearch(m_pattern_, m_text_, 315 (RuleBasedCollator *)&m_collator_, 316 m_breakiterator_, 317 status); 318 /* test for NULL */ 319 if (result == 0) { 320 status = U_MEMORY_ALLOCATION_ERROR; 321 return 0; 322 } 323 result->setOffset(getOffset(), status); 324 result->setMatchStart(m_strsrch_->search->matchedIndex); 325 result->setMatchLength(m_strsrch_->search->matchedLength); 326 if (U_FAILURE(status)) { 327 return NULL; 328 } 329 return result; 330 } 331 332 // protected method ------------------------------------------------- 333 334 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) 335 { 336 // values passed here are already in the pre-shift position 337 if (U_SUCCESS(status)) { 338 if (m_strsrch_->pattern.CELength == 0) { 339 m_search_->matchedIndex = 340 m_search_->matchedIndex == USEARCH_DONE ? 341 getOffset() : m_search_->matchedIndex + 1; 342 m_search_->matchedLength = 0; 343 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 344 &status); 345 if (m_search_->matchedIndex == m_search_->textLength) { 346 m_search_->matchedIndex = USEARCH_DONE; 347 } 348 } 349 else { 350 // looking at usearch.cpp, this part is shifted out to 351 // StringSearch instead of SearchIterator because m_strsrch_ is 352 // not accessible in SearchIterator 353 #if 0 354 if (position + m_strsrch_->pattern.defaultShiftSize 355 > m_search_->textLength) { 356 setMatchNotFound(); 357 return USEARCH_DONE; 358 } 359 #endif 360 if (m_search_->matchedLength <= 0) { 361 // the flipping direction issue has already been handled 362 // in next() 363 // for boundary check purposes. this will ensure that the 364 // next match will not preceed the current offset 365 // note search->matchedIndex will always be set to something 366 // in the code 367 m_search_->matchedIndex = position - 1; 368 } 369 370 ucol_setOffset(m_strsrch_->textIter, position, &status); 371 372 #if 0 373 for (;;) { 374 if (m_search_->isCanonicalMatch) { 375 // can't use exact here since extra accents are allowed. 376 usearch_handleNextCanonical(m_strsrch_, &status); 377 } 378 else { 379 usearch_handleNextExact(m_strsrch_, &status); 380 } 381 if (U_FAILURE(status)) { 382 return USEARCH_DONE; 383 } 384 if (m_breakiterator_ == NULL 385 #if !UCONFIG_NO_BREAK_ITERATION 386 || 387 m_search_->matchedIndex == USEARCH_DONE || 388 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && 389 m_breakiterator_->isBoundary(m_search_->matchedIndex + 390 m_search_->matchedLength)) 391 #endif 392 ) { 393 if (m_search_->matchedIndex == USEARCH_DONE) { 394 ucol_setOffset(m_strsrch_->textIter, 395 m_search_->textLength, &status); 396 } 397 else { 398 ucol_setOffset(m_strsrch_->textIter, 399 m_search_->matchedIndex, &status); 400 } 401 return m_search_->matchedIndex; 402 } 403 } 404 #else 405 // if m_strsrch_->breakIter is always the same as m_breakiterator_ 406 // then we don't need to check the match boundaries here because 407 // usearch_handleNextXXX will already have done it. 408 if (m_search_->isCanonicalMatch) { 409 // *could* actually use exact here 'cause no extra accents allowed... 410 usearch_handleNextCanonical(m_strsrch_, &status); 411 } else { 412 usearch_handleNextExact(m_strsrch_, &status); 413 } 414 415 if (U_FAILURE(status)) { 416 return USEARCH_DONE; 417 } 418 419 if (m_search_->matchedIndex == USEARCH_DONE) { 420 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status); 421 } else { 422 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status); 423 } 424 425 return m_search_->matchedIndex; 426 #endif 427 } 428 } 429 return USEARCH_DONE; 430 } 431 432 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status) 433 { 434 // values passed here are already in the pre-shift position 435 if (U_SUCCESS(status)) { 436 if (m_strsrch_->pattern.CELength == 0) { 437 m_search_->matchedIndex = 438 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : 439 m_search_->matchedIndex); 440 if (m_search_->matchedIndex == 0) { 441 setMatchNotFound(); 442 } 443 else { 444 m_search_->matchedIndex --; 445 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 446 &status); 447 m_search_->matchedLength = 0; 448 } 449 } 450 else { 451 // looking at usearch.cpp, this part is shifted out to 452 // StringSearch instead of SearchIterator because m_strsrch_ is 453 // not accessible in SearchIterator 454 #if 0 455 if (!m_search_->isOverlap && 456 position - m_strsrch_->pattern.defaultShiftSize < 0) { 457 setMatchNotFound(); 458 return USEARCH_DONE; 459 } 460 461 for (;;) { 462 if (m_search_->isCanonicalMatch) { 463 // can't use exact here since extra accents are allowed. 464 usearch_handlePreviousCanonical(m_strsrch_, &status); 465 } 466 else { 467 usearch_handlePreviousExact(m_strsrch_, &status); 468 } 469 if (U_FAILURE(status)) { 470 return USEARCH_DONE; 471 } 472 if (m_breakiterator_ == NULL 473 #if !UCONFIG_NO_BREAK_ITERATION 474 || 475 m_search_->matchedIndex == USEARCH_DONE || 476 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && 477 m_breakiterator_->isBoundary(m_search_->matchedIndex + 478 m_search_->matchedLength)) 479 #endif 480 ) { 481 return m_search_->matchedIndex; 482 } 483 } 484 #else 485 ucol_setOffset(m_strsrch_->textIter, position, &status); 486 487 if (m_search_->isCanonicalMatch) { 488 // *could* use exact match here since extra accents *not* allowed! 489 usearch_handlePreviousCanonical(m_strsrch_, &status); 490 } else { 491 usearch_handlePreviousExact(m_strsrch_, &status); 492 } 493 494 if (U_FAILURE(status)) { 495 return USEARCH_DONE; 496 } 497 498 return m_search_->matchedIndex; 499 #endif 500 } 501 502 return m_search_->matchedIndex; 503 } 504 return USEARCH_DONE; 505 } 506 507 U_NAMESPACE_END 508 509 #endif /* #if !UCONFIG_NO_COLLATION */ 510