1 /* 2 ****************************************************************************** 3 * Copyright (C) 1996-2011, International Business Machines * 4 * Corporation and others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 8 #include "unicode/utypes.h" 9 10 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 11 12 #include "unicode/unistr.h" 13 #include "unicode/putil.h" 14 #include "unicode/usearch.h" 15 16 #include "cmemory.h" 17 #include "unicode/coll.h" 18 #include "unicode/tblcoll.h" 19 #include "unicode/coleitr.h" 20 #include "unicode/ucoleitr.h" 21 22 #include "unicode/regex.h" // TODO: make conditional on regexp being built. 23 24 #include "unicode/uniset.h" 25 #include "unicode/uset.h" 26 #include "unicode/ustring.h" 27 #include "hash.h" 28 #include "uhash.h" 29 #include "ucol_imp.h" 30 #include "normalizer2impl.h" 31 32 #include "unicode/colldata.h" 33 #include "unicode/bmsearch.h" 34 35 U_NAMESPACE_BEGIN 36 37 #define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) 38 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) 39 #define DELETE_ARRAY(array) uprv_free((void *) (array)) 40 41 42 struct CEI 43 { 44 uint32_t order; 45 int32_t lowOffset; 46 int32_t highOffset; 47 }; 48 49 class Target : public UMemory 50 { 51 public: 52 Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status); 53 ~Target(); 54 55 void setTargetString(const UnicodeString *target); 56 57 const CEI *nextCE(int32_t offset); 58 const CEI *prevCE(int32_t offset); 59 60 int32_t stringLength(); 61 UChar charAt(int32_t offset); 62 63 UBool isBreakBoundary(int32_t offset); 64 int32_t nextBreakBoundary(int32_t offset); 65 int32_t nextSafeBoundary(int32_t offset); 66 67 UBool isIdentical(UnicodeString &pattern, int32_t start, int32_t end); 68 69 void setOffset(int32_t offset); 70 void setLast(int32_t last); 71 int32_t getOffset(); 72 73 private: 74 CEI *ceb; 75 int32_t bufferSize; 76 int32_t bufferMin; 77 int32_t bufferMax; 78 79 uint32_t strengthMask; 80 UCollationStrength strength; 81 uint32_t variableTop; 82 UBool toShift; 83 UCollator *coll; 84 const Normalizer2 &nfd; 85 86 const UnicodeString *targetString; 87 const UChar *targetBuffer; 88 int32_t targetLength; 89 90 UCollationElements *elements; 91 UBreakIterator *charBreakIterator; 92 }; 93 94 Target::Target(UCollator *theCollator, const UnicodeString *target, int32_t patternLength, UErrorCode &status) 95 : bufferSize(0), bufferMin(0), bufferMax(0), 96 strengthMask(0), strength(UCOL_PRIMARY), variableTop(0), toShift(FALSE), coll(theCollator), 97 nfd(*Normalizer2Factory::getNFDInstance(status)), 98 targetString(NULL), targetBuffer(NULL), targetLength(0), elements(NULL), charBreakIterator(NULL) 99 { 100 strength = ucol_getStrength(coll); 101 toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED; 102 variableTop = ucol_getVariableTop(coll, &status); 103 104 // find the largest expansion 105 uint8_t maxExpansion = 0; 106 for (const uint8_t *expansion = coll->expansionCESize; *expansion != 0; expansion += 1) { 107 if (*expansion > maxExpansion) { 108 maxExpansion = *expansion; 109 } 110 } 111 112 // room for an extra character on each end, plus 4 for safety 113 bufferSize = patternLength + (2 * maxExpansion) + 4; 114 115 ceb = NEW_ARRAY(CEI, bufferSize); 116 117 if (ceb == NULL) { 118 status = U_MEMORY_ALLOCATION_ERROR; 119 return; 120 } 121 122 if (target != NULL) { 123 setTargetString(target); 124 } 125 126 switch (strength) 127 { 128 default: 129 strengthMask |= UCOL_TERTIARYORDERMASK; 130 /* fall through */ 131 132 case UCOL_SECONDARY: 133 strengthMask |= UCOL_SECONDARYORDERMASK; 134 /* fall through */ 135 136 case UCOL_PRIMARY: 137 strengthMask |= UCOL_PRIMARYORDERMASK; 138 } 139 } 140 141 Target::~Target() 142 { 143 ubrk_close(charBreakIterator); 144 ucol_closeElements(elements); 145 146 DELETE_ARRAY(ceb); 147 } 148 149 void Target::setTargetString(const UnicodeString *target) 150 { 151 if (charBreakIterator != NULL) { 152 ubrk_close(charBreakIterator); 153 ucol_closeElements(elements); 154 } 155 156 targetString = target; 157 158 if (targetString != NULL) { 159 UErrorCode status = U_ZERO_ERROR; 160 161 targetBuffer = targetString->getBuffer(); 162 targetLength = targetString->length(); 163 164 elements = ucol_openElements(coll, target->getBuffer(), target->length(), &status); 165 ucol_forceHanImplicit(elements, &status); 166 167 charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status), 168 targetBuffer, targetLength, &status); 169 } else { 170 targetBuffer = NULL; 171 targetLength = 0; 172 } 173 } 174 175 const CEI *Target::nextCE(int32_t offset) 176 { 177 UErrorCode status = U_ZERO_ERROR; 178 int32_t low = -1, high = -1; 179 uint32_t order; 180 UBool cont = FALSE; 181 182 if (offset >= bufferMin && offset < bufferMax) { 183 return &ceb[offset]; 184 } 185 186 if (bufferMax >= bufferSize || offset != bufferMax) { 187 return NULL; 188 } 189 190 do { 191 low = ucol_getOffset(elements); 192 order = ucol_next(elements, &status); 193 high = ucol_getOffset(elements); 194 195 if (order == (uint32_t)UCOL_NULLORDER) { 196 //high = low = -1; 197 break; 198 } 199 200 cont = isContinuation(order); 201 order &= strengthMask; 202 203 if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) { 204 if (strength >= UCOL_QUATERNARY) { 205 order &= UCOL_PRIMARYORDERMASK; 206 } else { 207 order = UCOL_IGNORABLE; 208 } 209 } 210 } while (order == UCOL_IGNORABLE); 211 212 if (cont) { 213 order |= UCOL_CONTINUATION_MARKER; 214 } 215 216 ceb[offset].order = order; 217 ceb[offset].lowOffset = low; 218 ceb[offset].highOffset = high; 219 220 bufferMax += 1; 221 222 return &ceb[offset]; 223 } 224 225 const CEI *Target::prevCE(int32_t offset) 226 { 227 UErrorCode status = U_ZERO_ERROR; 228 int32_t low = -1, high = -1; 229 uint32_t order; 230 UBool cont = FALSE; 231 232 if (offset >= bufferMin && offset < bufferMax) { 233 return &ceb[offset]; 234 } 235 236 if (bufferMax >= bufferSize || offset != bufferMax) { 237 return NULL; 238 } 239 240 do { 241 high = ucol_getOffset(elements); 242 order = ucol_previous(elements, &status); 243 low = ucol_getOffset(elements); 244 245 if (order == (uint32_t)UCOL_NULLORDER) { 246 break; 247 } 248 249 cont = isContinuation(order); 250 order &= strengthMask; 251 252 if (toShift && variableTop > order && (order & UCOL_PRIMARYORDERMASK) != 0) { 253 if (strength >= UCOL_QUATERNARY) { 254 order &= UCOL_PRIMARYORDERMASK; 255 } else { 256 order = UCOL_IGNORABLE; 257 } 258 } 259 } while (order == UCOL_IGNORABLE); 260 261 bufferMax += 1; 262 263 if (cont) { 264 order |= UCOL_CONTINUATION_MARKER; 265 } 266 267 ceb[offset].order = order; 268 ceb[offset].lowOffset = low; 269 ceb[offset].highOffset = high; 270 271 return &ceb[offset]; 272 } 273 274 int32_t Target::stringLength() 275 { 276 if (targetString != NULL) { 277 return targetLength; 278 } 279 280 return 0; 281 } 282 283 UChar Target::charAt(int32_t offset) 284 { 285 if (targetString != NULL) { 286 return targetBuffer[offset]; 287 } 288 289 return 0x0000; 290 } 291 292 void Target::setOffset(int32_t offset) 293 { 294 UErrorCode status = U_ZERO_ERROR; 295 296 bufferMin = 0; 297 bufferMax = 0; 298 299 ucol_setOffset(elements, offset, &status); 300 } 301 302 void Target::setLast(int32_t last) 303 { 304 UErrorCode status = U_ZERO_ERROR; 305 306 bufferMin = 0; 307 bufferMax = 1; 308 309 ceb[0].order = UCOL_NULLORDER; 310 ceb[0].lowOffset = last; 311 ceb[0].highOffset = last; 312 313 ucol_setOffset(elements, last, &status); 314 } 315 316 int32_t Target::getOffset() 317 { 318 return ucol_getOffset(elements); 319 } 320 321 UBool Target::isBreakBoundary(int32_t offset) 322 { 323 return ubrk_isBoundary(charBreakIterator, offset); 324 } 325 326 int32_t Target::nextBreakBoundary(int32_t offset) 327 { 328 return ubrk_following(charBreakIterator, offset); 329 } 330 331 int32_t Target::nextSafeBoundary(int32_t offset) 332 { 333 while (offset < targetLength) { 334 //UChar ch = charAt(offset); 335 UChar ch = targetBuffer[offset]; 336 337 if (U_IS_LEAD(ch) || ! ucol_unsafeCP(ch, coll)) { 338 return offset; 339 } 340 341 offset += 1; 342 } 343 344 return targetLength; 345 } 346 347 UBool Target::isIdentical(UnicodeString &pattern, int32_t start, int32_t end) 348 { 349 if (strength < UCOL_IDENTICAL) { 350 return TRUE; 351 } 352 353 // Note: We could use Normalizer::compare() or similar, but for short strings 354 // which may not be in FCD it might be faster to just NFD them. 355 UErrorCode status = U_ZERO_ERROR; 356 UnicodeString t2, p2; 357 nfd.normalize(UnicodeString(FALSE, targetBuffer + start, end - start), t2, status); 358 nfd.normalize(pattern, p2, status); 359 // return FALSE if NFD failed 360 return U_SUCCESS(status) && t2 == p2; 361 } 362 363 #define HASH_TABLE_SIZE 257 364 365 class BadCharacterTable : public UMemory 366 { 367 public: 368 BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status); 369 ~BadCharacterTable(); 370 371 int32_t operator[](uint32_t ce) const; 372 int32_t getMaxSkip() const; 373 int32_t minLengthInChars(int32_t index); 374 375 private: 376 static int32_t hash(uint32_t ce); 377 378 int32_t maxSkip; 379 int32_t badCharacterTable[HASH_TABLE_SIZE]; 380 381 int32_t *minLengthCache; 382 }; 383 384 BadCharacterTable::BadCharacterTable(CEList &patternCEs, CollData *data, UErrorCode &status) 385 : minLengthCache(NULL) 386 { 387 int32_t plen = patternCEs.size(); 388 389 // **** need a better way to deal with this **** 390 if (U_FAILURE(status) || plen == 0) { 391 return; 392 } 393 394 int32_t *history = NEW_ARRAY(int32_t, plen); 395 396 if (history == NULL) { 397 status = U_MEMORY_ALLOCATION_ERROR; 398 return; 399 } 400 401 for (int32_t i = 0; i < plen; i += 1) { 402 history[i] = -1; 403 } 404 405 minLengthCache = NEW_ARRAY(int32_t, plen + 1); 406 407 if (minLengthCache == NULL) { 408 DELETE_ARRAY(history); 409 status = U_MEMORY_ALLOCATION_ERROR; 410 return; 411 } 412 413 maxSkip = minLengthCache[0] = data->minLengthInChars(&patternCEs, 0, history); 414 415 for(int32_t j = 0; j < HASH_TABLE_SIZE; j += 1) { 416 badCharacterTable[j] = maxSkip; 417 } 418 419 for(int32_t p = 1; p < plen; p += 1) { 420 minLengthCache[p] = data->minLengthInChars(&patternCEs, p, history); 421 422 // Make sure this entry is not bigger than the previous one. 423 // Otherwise, we might skip too far in some cases. 424 if (minLengthCache[p] < 0 || minLengthCache[p] > minLengthCache[p - 1]) { 425 minLengthCache[p] = minLengthCache[p - 1]; 426 } 427 } 428 429 minLengthCache[plen] = 0; 430 431 for(int32_t p = 0; p < plen - 1; p += 1) { 432 badCharacterTable[hash(patternCEs[p])] = minLengthCache[p + 1]; 433 } 434 435 DELETE_ARRAY(history); 436 } 437 438 BadCharacterTable::~BadCharacterTable() 439 { 440 DELETE_ARRAY(minLengthCache); 441 } 442 443 int32_t BadCharacterTable::operator[](uint32_t ce) const 444 { 445 return badCharacterTable[hash(ce)]; 446 } 447 448 int32_t BadCharacterTable::getMaxSkip() const 449 { 450 return maxSkip; 451 } 452 453 int32_t BadCharacterTable::minLengthInChars(int32_t index) 454 { 455 return minLengthCache[index]; 456 } 457 458 int32_t BadCharacterTable::hash(uint32_t ce) 459 { 460 return UCOL_PRIMARYORDER(ce) % HASH_TABLE_SIZE; 461 } 462 463 class GoodSuffixTable : public UMemory 464 { 465 public: 466 GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status); 467 ~GoodSuffixTable(); 468 469 int32_t operator[](int32_t offset) const; 470 471 private: 472 int32_t *goodSuffixTable; 473 }; 474 475 GoodSuffixTable::GoodSuffixTable(CEList &patternCEs, BadCharacterTable &badCharacterTable, UErrorCode &status) 476 : goodSuffixTable(NULL) 477 { 478 int32_t patlen = patternCEs.size(); 479 480 // **** need a better way to deal with this **** 481 if (U_FAILURE(status) || patlen <= 0) { 482 return; 483 } 484 485 int32_t *suff = NEW_ARRAY(int32_t, patlen); 486 int32_t start = patlen - 1, end = - 1; 487 int32_t maxSkip = badCharacterTable.getMaxSkip(); 488 489 if (suff == NULL) { 490 status = U_MEMORY_ALLOCATION_ERROR; 491 return; 492 } 493 494 // initialze suff 495 suff[patlen - 1] = patlen; 496 497 for (int32_t i = patlen - 2; i >= 0; i -= 1) { 498 // (i > start) means we're inside the last suffix match we found 499 // ((patlen - 1) - end) is how far the end of that match is from end of pattern 500 // (i - start) is how far we are from start of that match 501 // (i + (patlen - 1) - end) is index of same character at end of pattern 502 // so if any suffix match at that character doesn't extend beyond the last match, 503 // it's the suffix for this character as well 504 if (i > start && suff[i + patlen - 1 - end] < i - start) { 505 suff[i] = suff[i + patlen - 1 - end]; 506 } else { 507 start = end = i; 508 509 int32_t s = patlen; 510 511 while (start >= 0 && patternCEs[start] == patternCEs[--s]) { 512 start -= 1; 513 } 514 515 suff[i] = end - start; 516 } 517 } 518 519 // now build goodSuffixTable 520 goodSuffixTable = NEW_ARRAY(int32_t, patlen); 521 522 if (goodSuffixTable == NULL) { 523 DELETE_ARRAY(suff); 524 status = U_MEMORY_ALLOCATION_ERROR; 525 return; 526 } 527 528 529 // initialize entries to minLengthInChars of the pattern 530 for (int32_t i = 0; i < patlen; i += 1) { 531 goodSuffixTable[i] = maxSkip; 532 } 533 534 int32_t prefix = 0; 535 536 for (int32_t i = patlen - /*1*/ 2; i >= 0; i -= 1) { 537 if (suff[i] == i + 1) { 538 // this matching suffix is a prefix of the pattern 539 int32_t prefixSkip = badCharacterTable.minLengthInChars(i + 1); 540 541 // for any mis-match before this suffix, we should skip 542 // so that the front of the pattern (i.e. the prefix) 543 // lines up with the front of the suffix. 544 // (patlen - 1 - i) is the start of the suffix 545 while (prefix < patlen - 1 - i) { 546 // value of maxSkip means never set... 547 if (goodSuffixTable[prefix] == maxSkip) { 548 goodSuffixTable[prefix] = prefixSkip; 549 } 550 551 prefix += 1; 552 } 553 } 554 } 555 556 for (int32_t i = 0; i < patlen - 1; i += 1) { 557 goodSuffixTable[patlen - 1 - suff[i]] = badCharacterTable.minLengthInChars(i + 1); 558 } 559 560 DELETE_ARRAY(suff); 561 } 562 563 GoodSuffixTable::~GoodSuffixTable() 564 { 565 DELETE_ARRAY(goodSuffixTable); 566 } 567 568 int32_t GoodSuffixTable::operator[](int32_t offset) const 569 { 570 return goodSuffixTable[offset]; 571 } 572 573 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BoyerMooreSearch) 574 575 576 UBool BoyerMooreSearch::empty() 577 { 578 return patCEs->size() <= 0; 579 } 580 581 CollData *BoyerMooreSearch::getData() 582 { 583 return data; 584 } 585 586 CEList *BoyerMooreSearch::getPatternCEs() 587 { 588 return patCEs; 589 } 590 591 BadCharacterTable *BoyerMooreSearch::getBadCharacterTable() 592 { 593 return badCharacterTable; 594 } 595 596 GoodSuffixTable *BoyerMooreSearch::getGoodSuffixTable() 597 { 598 return goodSuffixTable; 599 } 600 601 BoyerMooreSearch::BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString, 602 UErrorCode &status) 603 : data(theData), patCEs(NULL), badCharacterTable(NULL), goodSuffixTable(NULL), pattern(patternString), target(NULL) 604 { 605 606 if (U_FAILURE(status)) { 607 return; 608 } 609 610 UCollator *collator = data->getCollator(); 611 612 patCEs = new CEList(collator, patternString, status); 613 614 if (patCEs == NULL || U_FAILURE(status)) { 615 return; 616 } 617 618 badCharacterTable = new BadCharacterTable(*patCEs, data, status); 619 620 if (badCharacterTable == NULL || U_FAILURE(status)) { 621 return; 622 } 623 624 goodSuffixTable = new GoodSuffixTable(*patCEs, *badCharacterTable, status); 625 626 if (targetString != NULL) { 627 target = new Target(collator, targetString, patCEs->size(), status); 628 } 629 } 630 631 BoyerMooreSearch::~BoyerMooreSearch() 632 { 633 delete target; 634 delete goodSuffixTable; 635 delete badCharacterTable; 636 delete patCEs; 637 } 638 639 void BoyerMooreSearch::setTargetString(const UnicodeString *targetString, UErrorCode &status) 640 { 641 if (U_FAILURE(status)) { 642 return; 643 } 644 645 if (target == NULL) { 646 target = new Target(data->getCollator(), targetString, patCEs->size(), status); 647 } else { 648 target->setTargetString(targetString); 649 } 650 } 651 652 // **** main flow of this code from Laura Werner's "Unicode Text Searching in Java" paper. **** 653 /* 654 * TODO: 655 * * deal with trailing (and leading?) ignorables. 656 * * Adding BoyerMooreSearch object slowed it down. How can we speed it up? 657 */ 658 UBool BoyerMooreSearch::search(int32_t offset, int32_t &start, int32_t &end) 659 { 660 /*UCollator *coll =*/ data->getCollator(); 661 int32_t plen = patCEs->size(); 662 int32_t tlen = target->stringLength(); 663 int32_t maxSkip = badCharacterTable->getMaxSkip(); 664 int32_t tOffset = offset + maxSkip; 665 666 if (plen <= 0) { 667 // Searching for a zero length pattern always fails. 668 start = end = -1; 669 return FALSE; 670 } 671 672 while (tOffset <= tlen) { 673 int32_t pIndex = plen - 1; 674 int32_t tIndex = 0; 675 int32_t lIndex = 0; 676 677 if (tOffset < tlen) { 678 // **** we really want to skip ahead enough to **** 679 // **** be sure we get at least 1 non-ignorable **** 680 // **** CE after the end of the pattern. **** 681 int32_t next = target->nextSafeBoundary(tOffset + 1); 682 683 target->setOffset(next); 684 685 for (lIndex = 0; ; lIndex += 1) { 686 const CEI *cei = target->prevCE(lIndex); 687 int32_t low = cei->lowOffset; 688 int32_t high = cei->highOffset; 689 690 if (high == 0 || (low < high && low <= tOffset)) { 691 if (low < tOffset) { 692 while (lIndex >= 0 && target->prevCE(lIndex)->highOffset == high) { 693 lIndex -= 1; 694 } 695 696 if (high > tOffset) { 697 tOffset = high; 698 } 699 } 700 701 break; 702 } 703 } 704 } else { 705 target->setLast(tOffset); 706 lIndex = 0; 707 } 708 709 tIndex = ++lIndex; 710 711 // Iterate backward until we hit the beginning of the pattern 712 while (pIndex >= 0) { 713 uint32_t pce = (*patCEs)[pIndex]; 714 const CEI *tcei = target->prevCE(tIndex++); 715 716 717 if (tcei->order != pce) { 718 // There is a mismatch at this position. Decide how far 719 // over to shift the pattern, then try again. 720 721 int32_t gsOffset = tOffset + (*goodSuffixTable)[pIndex]; 722 #ifdef EXTRA_CAUTIOUS 723 int32_t old = tOffset; 724 #endif 725 726 tOffset += (*badCharacterTable)[tcei->order] - badCharacterTable->minLengthInChars(pIndex + 1); 727 728 if (gsOffset > tOffset) { 729 tOffset = gsOffset; 730 } 731 732 #ifdef EXTRA_CAUTIOUS 733 // Make sure we don't skip backwards... 734 if (tOffset <= old) { 735 tOffset = old + 1; 736 } 737 #endif 738 739 break; 740 } 741 742 pIndex -= 1; 743 } 744 745 if (pIndex < 0) { 746 // We made it back to the beginning of the pattern, 747 // which means we matched it all. Return the location. 748 const CEI firstCEI = *target->prevCE(tIndex - 1); 749 const CEI lastCEI = *target->prevCE(lIndex); 750 int32_t mStart = firstCEI.lowOffset; 751 int32_t minLimit = lastCEI.lowOffset; 752 int32_t maxLimit = lastCEI.highOffset; 753 int32_t mLimit; 754 UBool found = TRUE; 755 756 target->setOffset(/*tOffset*/maxLimit); 757 758 const CEI nextCEI = *target->nextCE(0); 759 760 if (nextCEI.lowOffset > maxLimit) { 761 maxLimit = nextCEI.lowOffset; 762 } 763 764 if (nextCEI.lowOffset == nextCEI.highOffset && nextCEI.order != (uint32_t)UCOL_NULLORDER) { 765 found = FALSE; 766 } 767 768 if (! target->isBreakBoundary(mStart)) { 769 found = FALSE; 770 } 771 772 if (firstCEI.lowOffset == firstCEI.highOffset) { 773 found = FALSE; 774 } 775 776 mLimit = maxLimit; 777 if (minLimit < maxLimit) { 778 // When the last CE's low index is same with its high index, the CE is likely 779 // a part of expansion. In this case, the index is located just after the 780 // character corresponding to the CEs compared above. If the index is right 781 // at the break boundary, move the position to the next boundary will result 782 // incorrect match length when there are ignorable characters exist between 783 // the position and the next character produces CE(s). See ticket#8482. 784 if (minLimit == lastCEI.highOffset && target->isBreakBoundary(minLimit)) { 785 mLimit = minLimit; 786 } else { 787 int32_t nbb = target->nextBreakBoundary(minLimit); 788 789 if (nbb >= lastCEI.highOffset) { 790 mLimit = nbb; 791 } 792 } 793 } 794 795 if (mLimit > maxLimit) { 796 found = FALSE; 797 } 798 799 if (! target->isBreakBoundary(mLimit)) { 800 found = FALSE; 801 } 802 803 if (! target->isIdentical(pattern, mStart, mLimit)) { 804 found = FALSE; 805 } 806 807 if (found) { 808 start = mStart; 809 end = mLimit; 810 811 return TRUE; 812 } 813 814 tOffset += (*goodSuffixTable)[0]; // really? Maybe += 1 or += maxSkip? 815 } 816 // Otherwise, we're here because of a mismatch, so keep going.... 817 } 818 819 // no match 820 start = -1; 821 end = -1; 822 return FALSE; 823 } 824 825 U_NAMESPACE_END 826 827 #endif // #if !UCONFIG_NO_COLLATION 828