Lines Matching refs:strsrch
148 * @param strsrch string search data
153 inline int32_t getCE(const UStringSearch *strsrch, uint32_t sourcece)
158 sourcece &= strsrch->ceMask;
160 if (strsrch->toShift) {
166 if (strsrch->variableTop > sourcece) {
167 if (strsrch->strength >= UCOL_QUATERNARY) {
174 } else if (strsrch->strength >= UCOL_QUATERNARY && sourcece == UCOL_IGNORABLE) {
290 * @param strsrch string search data
296 inline uint16_t initializePatternCETable(UStringSearch *strsrch,
299 UPattern *pattern = &(strsrch->pattern);
303 UCollationElements *coleiter = strsrch->utilIter;
306 coleiter = ucol_openElements(strsrch->collator, pattern->text,
311 strsrch->utilIter = coleiter;
314 uprv_init_collIterate(strsrch->collator, pattern->text,
333 uint32_t newce = getCE(strsrch, ce);
365 * @param strsrch string search data
371 inline uint16_t initializePatternPCETable(UStringSearch *strsrch,
374 UPattern *pattern = &(strsrch->pattern);
378 UCollationElements *coleiter = strsrch->utilIter;
381 coleiter = ucol_openElements(strsrch->collator, pattern->text,
386 strsrch->utilIter = coleiter;
388 uprv_init_collIterate(strsrch->collator, pattern->text,
441 * @param strsrch UStringSearch data storage
447 inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status)
449 UPattern *pattern = &(strsrch->pattern);
455 if (strsrch->strength == UCOL_PRIMARY) {
468 if (strsrch->pattern.PCE != NULL) {
469 if (strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
470 uprv_free(strsrch->pattern.PCE);
473 strsrch->pattern.PCE = NULL;
477 return initializePatternCETable(strsrch, status);
529 * Building of the pattern collation element list and the boyer moore strsrch
552 * @param strsrch UStringSearch data storage
557 inline void initialize(UStringSearch *strsrch, UErrorCode *status)
559 int16_t expandlength = initializePattern(strsrch, status);
560 if (U_SUCCESS(*status) && strsrch->pattern.CELength > 0) {
561 UPattern *pattern = &strsrch->pattern;
571 strsrch->pattern.defaultShiftSize = 0;
578 * @param strsrch string search data
583 void checkBreakBoundary(const UStringSearch *strsrch, int32_t * /*start*/,
587 UBreakIterator *breakiterator = strsrch->search->internalBreakIter;
609 * @param strsrch string search data
614 UBool isBreakUnit(const UStringSearch *strsrch, int32_t start,
618 UBreakIterator *breakiterator = strsrch->search->breakIter;
638 UCollationElements *coleiter = strsrch->utilIter;
639 const UChar *text = strsrch->search->text +
643 for (int32_t count = 0; count < strsrch->pattern.CELength;
645 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
650 if (U_FAILURE(status) || ce != strsrch->pattern.CE[count]) {
656 && getCE(strsrch, nextce) == UCOL_IGNORABLE) {
705 * @param strsrch string search data
712 inline int32_t getNextUStringSearchBaseOffset(UStringSearch *strsrch,
715 int32_t textlength = strsrch->search->textLength;
716 if (strsrch->pattern.hasSuffixAccents &&
719 const UChar *text = strsrch->search->text;
733 * @param text strsrch string search data
741 inline int32_t shiftForward(UStringSearch *strsrch,
746 UPattern *pattern = &(strsrch->pattern);
761 textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset);
774 * @param strsrch string search data
777 inline void setMatchNotFound(UStringSearch *strsrch)
780 strsrch->search->matchedIndex = USEARCH_DONE;
781 strsrch->search->matchedLength = 0;
782 if (strsrch->search->isForwardSearching) {
783 setColEIterOffset(strsrch->textIter, strsrch->search->textLength);
786 setColEIterOffset(strsrch->textIter, 0);
831 * @param strsrch string search data
840 UBool checkExtraMatchAccents(const UStringSearch *strsrch, int32_t start,
845 if (strsrch->pattern.hasPrefixAccents) {
848 const UChar *text = strsrch->search->text + start;
853 int32_t safeoffset = getNextSafeOffset(strsrch->collator,
883 UCollationElements *coleiter = strsrch->utilIter;
885 uint32_t firstce = strsrch->pattern.CE[0];
923 * @param strsrch string search data
930 UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start,
933 if (strsrch->pattern.hasPrefixAccents) {
934 UCollationElements *coleiter = strsrch->textIter;
938 int32_t firstce = strsrch->pattern.CE[0];
941 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
949 ce = getCE(strsrch, ucol_next(coleiter, &status));
962 // accent = (getFCD(strsrch->search->text, &temp,
963 // strsrch->search->textLength)
968 UBool accent = getFCD(strsrch->search->text, &temp,
969 strsrch->search->textLength) > 0xFF;
971 return checkExtraMatchAccents(strsrch, start, end, &status);
978 U16_BACK_1(strsrch->search->text, 0, temp);
979 if (getFCD(strsrch->search->text, &temp,
980 strsrch->search->textLength) & LAST_BYTE_MASK_) {
1004 * @param strsrch string search data
1011 UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
1014 if (strsrch->pattern.hasSuffixAccents) {
1015 const UChar *text = strsrch->search->text;
1017 int32_t textlength = strsrch->search->textLength;
1020 int32_t firstce = strsrch->pattern.CE[0];
1021 UCollationElements *coleiter = strsrch->textIter;
1025 while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstce) {
1031 while (count < strsrch->pattern.CELength) {
1032 if (getCE(strsrch, ucol_next(coleiter, &status))
1048 ce = getCE(strsrch, ce);
1078 * @param strsrch string search data
1084 inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start,
1087 if (strsrch->strength != UCOL_IDENTICAL) {
1095 strsrch->nfd->normalize(
1096 UnicodeString(FALSE, strsrch->search->text + start, end - start), t2, status);
1097 strsrch->nfd->normalize(
1098 UnicodeString(FALSE, strsrch->pattern.text, strsrch->pattern.textLength), p2, status);
1106 * @param strsrch string search data
1112 inline UBool checkRepeatedMatch(UStringSearch *strsrch,
1116 int32_t lastmatchindex = strsrch->search->matchedIndex;
1121 if (strsrch->search->isForwardSearching) {
1127 if (!result && !strsrch->search->isOverlap) {
1128 if (strsrch->search->isForwardSearching) {
1129 result = start < lastmatchindex + strsrch->search->matchedLength;
1164 * @param strsrch string search data
1172 UBool checkNextExactContractionMatch(UStringSearch *strsrch,
1176 UCollationElements *coleiter = strsrch->textIter;
1177 int32_t textlength = strsrch->search->textLength;
1179 const UCollator *collator = strsrch->collator;
1180 const UChar *text = strsrch->search->text;
1215 int32_t *patternce = strsrch->pattern.CE;
1216 int32_t patterncelength = strsrch->pattern.CELength;
1219 int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
1229 *end = getNextUStringSearchBaseOffset(strsrch, *end);
1251 * @param strsrch string search data
1259 inline UBool checkNextExactMatch(UStringSearch *strsrch,
1262 UCollationElements *coleiter = strsrch->textIter;
1265 if (!checkNextExactContractionMatch(strsrch, &start, textoffset, status)) {
1270 if (!isBreakUnit(strsrch, start, *textoffset) ||
1271 checkRepeatedMatch(strsrch, start, *textoffset) ||
1272 hasAccentsBeforeMatch(strsrch, start, *textoffset) ||
1273 !checkIdentical(strsrch, start, *textoffset) ||
1274 hasAccentsAfterMatch(strsrch, start, *textoffset)) {
1277 *textoffset = getNextUStringSearchBaseOffset(strsrch, *textoffset);
1282 if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
1283 checkBreakBoundary(strsrch, &start, textoffset);
1287 strsrch->search->matchedIndex = start;
1288 strsrch->search->matchedLength = *textoffset - start;
1410 * @param strsrch string search data
1415 inline UBool checkCollationMatch(const UStringSearch *strsrch,
1418 int patternceindex = strsrch->pattern.CELength;
1419 int32_t *patternce = strsrch->pattern.CE;
1422 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
1447 * @param strsrch string search match
1455 int32_t doNextCanonicalPrefixMatch(UStringSearch *strsrch,
1460 const UChar *text = strsrch->search->text;
1461 int32_t textlength = strsrch->search->textLength;
1485 UCollationElements *coleiter = strsrch->utilIter;
1487 UChar *rearrange = strsrch->canonicalPrefixAccents;
1505 strsrch->canonicalPrefixAccents,
1506 strsrch->search->text + offset,
1508 strsrch->canonicalSuffixAccents,
1515 if (checkCollationMatch(strsrch, coleiter)) {
1555 * @param strsrch string search data
1561 inline void cleanUpSafeText(const UStringSearch *strsrch, UChar *safetext,
1564 if (safetext != safebuffer && safetext != strsrch->canonicalSuffixAccents)
1579 * @param strsrch string search data
1586 int32_t doNextCanonicalSuffixMatch(UStringSearch *strsrch,
1590 const UChar *text = strsrch->search->text;
1591 const UCollator *collator = strsrch->collator;
1596 UCollationElements *coleiter = strsrch->utilIter;
1599 if (textoffset != 0 && ucol_unsafeCP(strsrch->canonicalSuffixAccents[0],
1606 strsrch->canonicalSuffixAccents,
1610 safetextlength = u_strlen(strsrch->canonicalSuffixAccents);
1611 safetext = strsrch->canonicalSuffixAccents;
1618 int32_t *ce = strsrch->pattern.CE;
1619 int32_t celength = strsrch->pattern.CELength;
1627 cleanUpSafeText(strsrch, safetext, safebuffer);
1633 if (coleiter == strsrch->textIter) {
1634 cleanUpSafeText(strsrch, safetext, safebuffer);
1637 cleanUpSafeText(strsrch, safetext, safebuffer);
1639 coleiter = strsrch->textIter;
1645 textce = getCE(strsrch, textce);
1651 cleanUpSafeText(strsrch, safetext, safebuffer);
1657 cleanUpSafeText(strsrch, safetext, safebuffer);
1661 int32_t result = doNextCanonicalPrefixMatch(strsrch,
1665 setColEIterOffset(strsrch->textIter, result);
1682 cleanUpSafeText(strsrch, safetext, safebuffer);
1689 setColEIterOffset(strsrch->textIter, result);
1690 strsrch->textIter->iteratordata_.toReturn =
1691 setExpansionPrefix(strsrch->textIter, leftoverces);
1712 * @param strsrch string search data
1719 UBool doNextCanonicalMatch(UStringSearch *strsrch,
1723 const UChar *text = strsrch->search->text;
1727 UCollationElements *coleiter = strsrch->textIter;
1729 if (strsrch->pattern.hasPrefixAccents) {
1730 offset = doNextCanonicalPrefixMatch(strsrch, offset, textoffset,
1740 if (!strsrch->pattern.hasSuffixAccents) {
1758 UChar *rearrange = strsrch->canonicalSuffixAccents;
1774 int32_t offset = doNextCanonicalSuffixMatch(strsrch, baseoffset,
1787 * @param strsrch string search data
1793 inline int32_t getPreviousUStringSearchBaseOffset(UStringSearch *strsrch,
1796 if (strsrch->pattern.hasPrefixAccents && textoffset > 0) {
1797 const UChar *text = strsrch->search->text;
1799 if (getFCD(text, &offset, strsrch->search->textLength) >>
1814 * @param strsrch string search data
1821 UBool checkNextCanonicalContractionMatch(UStringSearch *strsrch,
1826 UCollationElements *coleiter = strsrch->textIter;
1827 int32_t textlength = strsrch->search->textLength;
1829 const UCollator *collator = strsrch->collator;
1830 const UChar *text = strsrch->search->text;
1858 int32_t *patternce = strsrch->pattern.CE;
1859 int32_t patterncelength = strsrch->pattern.CELength;
1861 int32_t textlength = strsrch->search->textLength;
1863 int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
1880 ce = getCE(strsrch, ucol_next(coleiter, status));
1884 ce = getCE(strsrch, ucol_next(coleiter, status));
1890 *end = getNextUStringSearchBaseOffset(strsrch, *end);
1911 * @param strsrch string search data
1919 inline UBool checkNextCanonicalMatch(UStringSearch *strsrch,
1924 UCollationElements *coleiter = strsrch->textIter;
1926 if ((strsrch->pattern.hasSuffixAccents &&
1927 strsrch->canonicalSuffixAccents[0]) ||
1928 (strsrch->pattern.hasPrefixAccents &&
1929 strsrch->canonicalPrefixAccents[0])) {
1930 strsrch->search->matchedIndex = getPreviousUStringSearchBaseOffset(
1931 strsrch,
1933 strsrch->search->matchedLength = *textoffset -
1934 strsrch->search->matchedIndex;
1939 if (!checkNextCanonicalContractionMatch(strsrch, &start, textoffset,
1944 start = getPreviousUStringSearchBaseOffset(strsrch, start);
1946 if (checkRepeatedMatch(strsrch, start, *textoffset) ||
1947 !isBreakUnit(strsrch, start, *textoffset) ||
1948 !checkIdentical(strsrch, start, *textoffset)) {
1950 *textoffset = getNextBaseOffset(strsrch->search->text, *textoffset,
1951 strsrch->search->textLength);
1955 strsrch->search->matchedIndex = start;
1956 strsrch->search->matchedLength = *textoffset - start;
1966 * @param text strsrch string search data
1974 inline int32_t reverseShift(UStringSearch *strsrch,
1979 if (strsrch->search->isOverlap) {
1980 if (textoffset != strsrch->search->textLength) {
1984 textoffset -= strsrch->pattern.defaultShiftSize;
1989 int32_t shift = strsrch->pattern.backShift[hash(ce)];
2000 textoffset -= strsrch->pattern.defaultShiftSize;
2003 textoffset = getPreviousUStringSearchBaseOffset(strsrch, textoffset);
2012 * @param strsrch string search data
2019 UBool checkPreviousExactContractionMatch(UStringSearch *strsrch,
2023 UCollationElements *coleiter = strsrch->textIter;
2024 int32_t textlength = strsrch->search->textLength;
2026 const UCollator *collator = strsrch->collator;
2027 const UChar *text = strsrch->search->text;
2056 int32_t *patternce = strsrch->pattern.CE;
2057 int32_t patterncelength = strsrch->pattern.CELength;
2060 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
2094 * @param strsrch string search data
2105 inline UBool checkPreviousExactMatch(UStringSearch *strsrch,
2110 int32_t end = ucol_getOffset(strsrch->textIter);
2111 if (!checkPreviousExactContractionMatch(strsrch, textoffset, &end, status)
2118 if (checkRepeatedMatch(strsrch, *textoffset, end) ||
2119 !isBreakUnit(strsrch, *textoffset, end) ||
2120 hasAccentsBeforeMatch(strsrch, *textoffset, end) ||
2121 !checkIdentical(strsrch, *textoffset, end) ||
2122 hasAccentsAfterMatch(strsrch, *textoffset, end)) {
2124 *textoffset = getPreviousBaseOffset(strsrch->search->text,
2130 if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
2131 checkBreakBoundary(strsrch, textoffset, &end);
2134 strsrch->search->matchedIndex = *textoffset;
2135 strsrch->search->matchedLength = end - *textoffset;
2151 * @param strsrch string search match
2159 int32_t doPreviousCanonicalSuffixMatch(UStringSearch *strsrch,
2164 const UChar *text = strsrch->search->text;
2168 if (!(getFCD(text, &tempend, strsrch->search->textLength) &
2173 end = getNextBaseOffset(text, end, strsrch->search->textLength);
2187 UCollationElements *coleiter = strsrch->utilIter;
2189 UChar *rearrange = strsrch->canonicalSuffixAccents;
2207 strsrch->canonicalPrefixAccents,
2208 strsrch->search->text + start,
2210 strsrch->canonicalSuffixAccents,
2217 if (checkCollationMatch(strsrch, coleiter)) {
2239 * @param strsrch string search data
2246 int32_t doPreviousCanonicalPrefixMatch(UStringSearch *strsrch,
2250 const UChar *text = strsrch->search->text;
2251 const UCollator *collator = strsrch->collator;
2259 ucol_unsafeCP(strsrch->canonicalPrefixAccents[
2260 u_strlen(strsrch->canonicalPrefixAccents) - 1
2263 strsrch->search->textLength);
2267 strsrch->canonicalPrefixAccents,
2272 safetextlength = u_strlen(strsrch->canonicalPrefixAccents);
2273 safetext = strsrch->canonicalPrefixAccents;
2276 UCollationElements *coleiter = strsrch->utilIter;
2281 int32_t *ce = strsrch->pattern.CE;
2282 int32_t celength = strsrch->pattern.CELength;
2285 int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents);
2291 cleanUpSafeText(strsrch, safetext, safebuffer);
2297 if (coleiter == strsrch->textIter) {
2298 cleanUpSafeText(strsrch, safetext, safebuffer);
2301 cleanUpSafeText(strsrch, safetext, safebuffer);
2303 coleiter = strsrch->textIter;
2309 textce = getCE(strsrch, textce);
2315 cleanUpSafeText(strsrch, safetext, safebuffer);
2321 cleanUpSafeText(strsrch, safetext, safebuffer);
2325 int32_t result = doPreviousCanonicalSuffixMatch(strsrch,
2329 setColEIterOffset(strsrch->textIter, result);
2346 cleanUpSafeText(strsrch, safetext, safebuffer);
2353 setColEIterOffset(strsrch->textIter, result);
2354 setExpansionSuffix(strsrch->textIter, leftoverces);
2375 * @param strsrch string search data
2382 UBool doPreviousCanonicalMatch(UStringSearch *strsrch,
2386 const UChar *text = strsrch->search->text;
2388 int32_t textlength = strsrch->search->textLength;
2390 UCollationElements *coleiter = strsrch->textIter;
2392 if (strsrch->pattern.hasSuffixAccents) {
2393 offset = doPreviousCanonicalSuffixMatch(strsrch, textoffset,
2403 if (!strsrch->pattern.hasPrefixAccents) {
2421 UChar *rearrange = strsrch->canonicalPrefixAccents;
2437 int32_t offset = doPreviousCanonicalPrefixMatch(strsrch,
2452 * @param strsrch string search data
2459 UBool checkPreviousCanonicalContractionMatch(UStringSearch *strsrch,
2463 UCollationElements *coleiter = strsrch->textIter;
2464 int32_t textlength = strsrch->search->textLength;
2466 const UCollator *collator = strsrch->collator;
2467 const UChar *text = strsrch->search->text;
2496 int32_t *patternce = strsrch->pattern.CE;
2497 int32_t patterncelength = strsrch->pattern.CELength;
2500 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
2518 ce = getCE(strsrch, ucol_previous(coleiter, status));
2522 ce = getCE(strsrch, ucol_previous(coleiter, status));
2549 * @param strsrch string search data
2557 inline UBool checkPreviousCanonicalMatch(UStringSearch *strsrch,
2562 UCollationElements *coleiter = strsrch->textIter;
2564 if ((strsrch->pattern.hasSuffixAccents &&
2565 strsrch->canonicalSuffixAccents[0]) ||
2566 (strsrch->pattern.hasPrefixAccents &&
2567 strsrch->canonicalPrefixAccents[0])) {
2568 strsrch->search->matchedIndex = *textoffset;
2569 strsrch->search->matchedLength =
2570 getNextUStringSearchBaseOffset(strsrch,
2577 if (!checkPreviousCanonicalContractionMatch(strsrch, textoffset, &end,
2583 end = getNextUStringSearchBaseOffset(strsrch, end);
2585 if (checkRepeatedMatch(strsrch, *textoffset, end) ||
2586 !isBreakUnit(strsrch, *textoffset, end) ||
2587 !checkIdentical(strsrch, *textoffset, end)) {
2589 *textoffset = getPreviousBaseOffset(strsrch->search->text,
2594 strsrch->search->matchedIndex = *textoffset;
2595 strsrch->search->matchedLength = end - *textoffset;
2762 U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch)
2764 if (strsrch) {
2765 if (strsrch->pattern.CE != strsrch->pattern.CEBuffer &&
2766 strsrch->pattern.CE) {
2767 uprv_free(strsrch->pattern.CE);
2770 if (strsrch->pattern.PCE != NULL &&
2771 strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
2772 uprv_free(strsrch->pattern.PCE);
2775 ucol_closeElements(strsrch->textIter);
2776 ucol_closeElements(strsrch->utilIter);
2778 if (strsrch->ownCollator && strsrch->collator) {
2779 ucol_close((UCollator *)strsrch->collator);
2783 if (strsrch->search->internalBreakIter) {
2784 ubrk_close(strsrch->search->internalBreakIter);
2788 uprv_free(strsrch->search);
2789 uprv_free(strsrch);
2795 U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
2799 if (U_SUCCESS(*status) && strsrch) {
2800 if (isOutOfBounds(strsrch->search->textLength, position)) {
2804 setColEIterOffset(strsrch->textIter, position);
2806 strsrch->search->matchedIndex = USEARCH_DONE;
2807 strsrch->search->matchedLength = 0;
2808 strsrch->search->reset = FALSE;
2812 U_CAPI int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch)
2814 if (strsrch) {
2815 int32_t result = ucol_getOffset(strsrch->textIter);
2816 if (isOutOfBounds(strsrch->search->textLength, result)) {
2824 U_CAPI void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch,
2829 if (U_SUCCESS(*status) && strsrch) {
2833 strsrch->search->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
2836 strsrch->search->isCanonicalMatch = (value == USEARCH_ON ? TRUE :
2841 strsrch->search->elementComparisonType = (int16_t)value;
2843 strsrch->search->elementComparisonType = 0;
2857 const UStringSearch *strsrch,
2860 if (strsrch) {
2863 return (strsrch->search->isOverlap == TRUE ? USEARCH_ON :
2866 return (strsrch->search->isCanonicalMatch == TRUE ? USEARCH_ON :
2870 int16_t value = strsrch->search->elementComparisonType;
2885 const UStringSearch *strsrch)
2887 if (strsrch == NULL) {
2890 return strsrch->search->matchedIndex;
2894 U_CAPI int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch,
2902 if (strsrch == NULL || resultCapacity < 0 || (resultCapacity > 0 &&
2908 int32_t copylength = strsrch->search->matchedLength;
2909 int32_t copyindex = strsrch->search->matchedIndex;
2919 uprv_memcpy(result, strsrch->search->text + copyindex,
2923 strsrch->search->matchedLength, status);
2927 const UStringSearch *strsrch)
2929 if (strsrch) {
2930 return strsrch->search->matchedLength;
2937 U_CAPI void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch,
2941 if (U_SUCCESS(*status) && strsrch) {
2942 strsrch->search->breakIter = breakiter;
2944 ubrk_setText(breakiter, strsrch->search->text,
2945 strsrch->search->textLength, status);
2951 usearch_getBreakIterator(const UStringSearch *strsrch)
2953 if (strsrch) {
2954 return strsrch->search->breakIter;
2961 U_CAPI void U_EXPORT2 usearch_setText( UStringSearch *strsrch,
2967 if (strsrch == NULL || text == NULL || textlength < -1 ||
2975 strsrch->search->text = text;
2976 strsrch->search->textLength = textlength;
2977 ucol_setText(strsrch->textIter, text, textlength, status);
2978 strsrch->search->matchedIndex = USEARCH_DONE;
2979 strsrch->search->matchedLength = 0;
2980 strsrch->search->reset = TRUE;
2982 if (strsrch->search->breakIter != NULL) {
2983 ubrk_setText(strsrch->search->breakIter, text,
2986 ubrk_setText(strsrch->search->internalBreakIter, text, textlength, status);
2992 U_CAPI const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch,
2995 if (strsrch) {
2996 *length = strsrch->search->textLength;
2997 return strsrch->search->text;
3002 U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
3012 if (strsrch) {
3013 if (strsrch->ownCollator && (strsrch->collator != collator)) {
3014 ucol_close((UCollator *)strsrch->collator);
3015 strsrch->ownCollator = FALSE;
3017 strsrch->collator = collator;
3018 strsrch->strength = ucol_getStrength(collator);
3019 strsrch->ceMask = getMask(strsrch->strength);
3021 ubrk_close(strsrch->search->internalBreakIter);
3022 strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(collator, ULOC_VALID_LOCALE, status),
3023 strsrch->search->text, strsrch->search->textLength, status);
3026 strsrch->toShift =
3030 strsrch->variableTop = ucol_getVariableTop(collator, status);
3032 initialize(strsrch, status);
3035 ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_));
3036 uprv_init_collIterate(collator, strsrch->search->text,
3037 strsrch->search->textLength,
3038 &(strsrch->textIter->iteratordata_),
3040 strsrch->utilIter->iteratordata_.coll = collator;
3049 uprv_init_pce(strsrch->textIter);
3050 uprv_init_pce(strsrch->utilIter);
3055 U_CAPI UCollator * U_EXPORT2 usearch_getCollator(const UStringSearch *strsrch)
3057 if (strsrch) {
3058 return (UCollator *)strsrch->collator;
3063 U_CAPI void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch,
3069 if (strsrch == NULL || pattern == NULL) {
3080 strsrch->pattern.text = pattern;
3081 strsrch->pattern.textLength = patternlength;
3082 initialize(strsrch, status);
3088 usearch_getPattern(const UStringSearch *strsrch,
3091 if (strsrch) {
3092 *length = strsrch->pattern.textLength;
3093 return strsrch->pattern.text;
3100 U_CAPI int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch,
3103 if (strsrch && U_SUCCESS(*status)) {
3104 strsrch->search->isForwardSearching = TRUE;
3105 usearch_setOffset(strsrch, 0, status);
3107 return usearch_next(strsrch, status);
3113 U_CAPI int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch,
3117 if (strsrch && U_SUCCESS(*status)) {
3118 strsrch->search->isForwardSearching = TRUE;
3120 usearch_setOffset(strsrch, position, status);
3122 return usearch_next(strsrch, status);
3128 U_CAPI int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch,
3131 if (strsrch && U_SUCCESS(*status)) {
3132 strsrch->search->isForwardSearching = FALSE;
3133 usearch_setOffset(strsrch, strsrch->search->textLength, status);
3135 return usearch_previous(strsrch, status);
3141 U_CAPI int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch,
3145 if (strsrch && U_SUCCESS(*status)) {
3146 strsrch->search->isForwardSearching = FALSE;
3148 usearch_setOffset(strsrch, position, status);
3150 return usearch_previous(strsrch, status);
3178 U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
3181 if (U_SUCCESS(*status) && strsrch) {
3184 int32_t offset = usearch_getOffset(strsrch);
3185 USearch *search = strsrch->search;
3192 (offset + strsrch->pattern.defaultShiftSize > textlength ||
3196 setMatchNotFound(strsrch);
3205 setMatchNotFound(strsrch);
3225 if (strsrch->pattern.CELength == 0) {
3234 setColEIterOffset(strsrch->textIter, search->matchedIndex);
3244 ucol_setOffset(strsrch->textIter, offset + 1, status);
3247 ucol_setOffset(strsrch->textIter,
3261 usearch_handleNextCanonical(strsrch, status);
3264 usearch_handleNextExact(strsrch, status);
3274 ucol_setOffset(strsrch->textIter, search->textLength, status);
3276 ucol_setOffset(strsrch->textIter, search->matchedIndex, status);
3286 U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
3289 if (U_SUCCESS(*status) && strsrch) {
3291 USearch *search = strsrch->search;
3296 setColEIterOffset(strsrch->textIter, offset);
3299 offset = usearch_getOffset(strsrch);
3318 (offset < strsrch->pattern.defaultShiftSize ||
3320 matchedindex < strsrch->pattern.defaultShiftSize)))) {
3322 setMatchNotFound(strsrch);
3329 setMatchNotFound(strsrch);
3336 if (strsrch->pattern.CELength == 0) {
3340 setMatchNotFound(strsrch);
3345 setColEIterOffset(strsrch->textIter, search->matchedIndex);
3351 if (strsrch->search->isCanonicalMatch) {
3353 usearch_handlePreviousCanonical(strsrch, status);
3357 usearch_handlePreviousExact(strsrch, status);
3374 U_CAPI void U_EXPORT2 usearch_reset(UStringSearch *strsrch)
3381 if (strsrch) {
3389 UCollationStrength newStrength = ucol_getStrength(strsrch->collator);
3390 if ((strsrch->strength < UCOL_QUATERNARY && newStrength >= UCOL_QUATERNARY) ||
3391 (strsrch->strength >= UCOL_QUATERNARY && newStrength < UCOL_QUATERNARY)) {
3395 strsrch->strength = ucol_getStrength(strsrch->collator);
3396 ceMask = getMask(strsrch->strength);
3397 if (strsrch->ceMask != ceMask) {
3398 strsrch->ceMask = ceMask;
3403 shift = ucol_getAttribute(strsrch->collator, UCOL_ALTERNATE_HANDLING,
3405 if (strsrch->toShift != shift) {
3406 strsrch->toShift = shift;
3411 varTop = ucol_getVariableTop(strsrch->collator, &status);
3412 if (strsrch->variableTop != varTop) {
3413 strsrch->variableTop = varTop;
3417 initialize(strsrch, &status);
3420 ucol_freeOffsetBuffer(&(strsrch->textIter->iteratordata_));
3421 uprv_init_collIterate(strsrch->collator, strsrch->search->text,
3422 strsrch->search->textLength,
3423 &(strsrch->textIter->iteratordata_),
3425 strsrch->search->matchedLength = 0;
3426 strsrch->search->matchedIndex = USEARCH_DONE;
3427 strsrch->search->isOverlap = FALSE;
3428 strsrch->search->isCanonicalMatch = FALSE;
3429 strsrch->search->elementComparisonType = 0;
3430 strsrch->search->isForwardSearching = TRUE;
3431 strsrch->search->reset = TRUE;
3612 static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex) {
3614 const UChar *text = strsrch->search->text;
3615 int32_t textLen = strsrch->search->textLength;
3651 UBreakIterator *breakiterator = strsrch->search->breakIter;
3654 breakiterator = strsrch->search->internalBreakIter;
3674 static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index) {
3676 const UChar *text = strsrch->search->text;
3677 int32_t textLen = strsrch->search->textLength;
3702 UBreakIterator *breakiterator = strsrch->search->breakIter;
3705 breakiterator = strsrch->search->internalBreakIter;
3716 static UBool onBreakBoundaries(const UStringSearch *strsrch, int32_t start, int32_t end)
3719 UBreakIterator *breakiterator = strsrch->search->breakIter;
3803 U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
3818 for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
3819 printf(" %8x", strsrch->pattern.CE[ii]);
3828 if(strsrch->pattern.CELength == 0 ||
3830 startIdx > strsrch->search->textLength ||
3831 strsrch->pattern.CE == NULL) {
3836 if (strsrch->pattern.PCE == NULL) {
3837 initializePatternPCETable(strsrch, status);
3840 ucol_setOffset(strsrch->textIter, startIdx, status);
3841 CEBuffer ceb(strsrch, status);
3887 for (patIx=0; patIx<strsrch->pattern.PCELength; patIx++) {
3888 patCE = strsrch->pattern.PCE[patIx];
3893 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
3908 targetIxOffset += strsrch->pattern.PCELength; // this is now the offset in target CE space to end of the match so far
3940 if (strsrch->search->elementComparisonType == 0) {
3958 UCompareCEsResult ceMatch = compareCE64s(nextCEI->ce, patCE, strsrch->search->elementComparisonType);
3982 if (!isBreakBoundary(strsrch, mStart)) {
4006 if (minLimit == lastCEI->highIndex && isBreakBoundary(strsrch, minLimit)) {
4009 int32_t nba = nextBoundaryAfter(strsrch, minLimit);
4028 if (!isBreakBoundary(strsrch, mLimit)) {
4032 if (! checkIdentical(strsrch, mStart, mLimit)) {
4070 U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
4085 for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
4086 printf(" %8x", strsrch->pattern.CE[ii]);
4095 if(strsrch->pattern.CELength == 0 ||
4097 startIdx > strsrch->search->textLength ||
4098 strsrch->pattern.CE == NULL) {
4103 if (strsrch->pattern.PCE == NULL) {
4104 initializePatternPCETable(strsrch, status);
4107 CEBuffer ceb(strsrch, status);
4119 if (startIdx < strsrch->search->textLength) {
4120 UBreakIterator *bi = strsrch->search->internalBreakIter;
4123 ucol_setOffset(strsrch->textIter, next, status);
4131 ucol_setOffset(strsrch->textIter, startIdx, status);
4168 for (patIx = strsrch->pattern.PCELength - 1; patIx >= 0; patIx -= 1) {
4169 int64_t patCE = strsrch->pattern.PCE[patIx];
4171 targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 - patIx + targetIxOffset);
4175 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
4207 const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 + targetIxOffset);
4216 if (!isBreakBoundary(strsrch, mStart)) {
4249 int32_t nba = nextBoundaryAfter(strsrch, minLimit);
4263 if (!isBreakBoundary(strsrch, mLimit)) {
4272 int32_t nba = nextBoundaryAfter(strsrch, minLimit);
4283 if (! checkIdentical(strsrch, mStart, mLimit)) {
4323 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
4326 setMatchNotFound(strsrch);
4331 UCollationElements *coleiter = strsrch->textIter;
4332 int32_t textlength = strsrch->search->textLength;
4333 int32_t *patternce = strsrch->pattern.CE;
4334 int32_t patterncelength = strsrch->pattern.CELength;
4340 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER,
4360 targetce = getCE(strsrch, targetce);
4390 targetce = getCE(strsrch, targetce);
4406 textoffset = shiftForward(strsrch, textoffset, lastce,
4413 if (checkNextExactMatch(strsrch, &textoffset, status)) {
4415 setColEIterOffset(coleiter, strsrch->search->matchedIndex);
4419 setMatchNotFound(strsrch);
4422 int32_t textOffset = ucol_getOffset(strsrch->textIter);
4426 if (usearch_search(strsrch, textOffset, &start, &end, status)) {
4427 strsrch->search->matchedIndex = start;
4428 strsrch->search->matchedLength = end - start;
4431 setMatchNotFound(strsrch);
4437 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
4440 setMatchNotFound(strsrch);
4445 UCollationElements *coleiter = strsrch->textIter;
4446 int32_t textlength = strsrch->search->textLength;
4447 int32_t *patternce = strsrch->pattern.CE;
4448 int32_t patterncelength = strsrch->pattern.CELength;
4451 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4453 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER,
4455 strsrch->canonicalPrefixAccents[0] = 0;
4456 strsrch->canonicalSuffixAccents[0] = 0;
4476 targetce = getCE(strsrch, targetce);
4498 targetce = getCE(strsrch, targetce);
4510 strsrch->canonicalPrefixAccents[0] = 0;
4511 strsrch->canonicalSuffixAccents[0] = 0;
4515 found = doNextCanonicalMatch(strsrch, textoffset, status);
4522 textoffset = shiftForward(strsrch, textoffset, lastce,
4529 if (checkNextCanonicalMatch(strsrch, &textoffset, status)) {
4530 setColEIterOffset(coleiter, strsrch->search->matchedIndex);
4534 setMatchNotFound(strsrch);
4537 int32_t textOffset = ucol_getOffset(strsrch->textIter);
4541 if (usearch_search(strsrch, textOffset, &start, &end, status)) {
4542 strsrch->search->matchedIndex = start;
4543 strsrch->search->matchedLength = end - start;
4546 setMatchNotFound(strsrch);
4552 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
4555 setMatchNotFound(strsrch);
4560 UCollationElements *coleiter = strsrch->textIter;
4561 int32_t *patternce = strsrch->pattern.CE;
4562 int32_t patterncelength = strsrch->pattern.CELength;
4568 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4569 textoffset = strsrch->search->matchedIndex;
4572 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER,
4595 targetce = getCE(strsrch, targetce);
4599 if (targetce == UCOL_IGNORABLE && strsrch->strength != UCOL_PRIMARY) {
4623 targetce = getCE(strsrch, targetce);
4640 textoffset = reverseShift(strsrch, textoffset, targetce,
4646 if (checkPreviousExactMatch(strsrch, &textoffset, status)) {
4651 setMatchNotFound(strsrch);
4656 if (strsrch->search->isOverlap) {
4657 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4658 textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
4661 initializePatternPCETable(strsrch, status);
4662 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCEs++) {
4663 int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL, status);
4670 setMatchNotFound(strsrch);
4673 textOffset = ucol_getOffset(strsrch->textIter);
4676 textOffset = ucol_getOffset(strsrch->textIter);
4682 if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
4683 strsrch->search->matchedIndex = start;
4684 strsrch->search->matchedLength = end - start;
4687 setMatchNotFound(strsrch);
4693 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
4697 setMatchNotFound(strsrch);
4702 UCollationElements *coleiter = strsrch->textIter;
4703 int32_t *patternce = strsrch->pattern.CE;
4704 int32_t patterncelength = strsrch->pattern.CELength;
4707 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4712 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4713 textoffset = strsrch->search->matchedIndex;
4716 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER,
4718 strsrch->canonicalPrefixAccents[0] = 0;
4719 strsrch->canonicalSuffixAccents[0] = 0;
4739 targetce = getCE(strsrch, targetce);
4765 targetce = getCE(strsrch, targetce);
4777 strsrch->canonicalPrefixAccents[0] = 0;
4778 strsrch->canonicalSuffixAccents[0] = 0;
4782 found = doPreviousCanonicalMatch(strsrch, textoffset, status);
4789 textoffset = reverseShift(strsrch, textoffset, targetce,
4795 if (checkPreviousCanonicalMatch(strsrch, &textoffset, status)) {
4800 setMatchNotFound(strsrch);
4805 if (strsrch->search->isOverlap) {
4806 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4807 textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
4810 initializePatternPCETable(strsrch, status);
4811 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCEs++) {
4812 int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL, status);
4819 setMatchNotFound(strsrch);
4822 textOffset = ucol_getOffset(strsrch->textIter);
4825 textOffset = ucol_getOffset(strsrch->textIter);
4831 if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
4832 strsrch->search->matchedIndex = start;
4833 strsrch->search->matchedLength = end - start;
4836 setMatchNotFound(strsrch);