Lines Matching refs:strsrch
142 * @param strsrch string search data
147 inline int32_t getCE(const UStringSearch *strsrch, uint32_t sourcece)
152 sourcece &= strsrch->ceMask;
154 if (strsrch->toShift) {
160 if (strsrch->variableTop > sourcece) {
161 if (strsrch->strength >= UCOL_QUATERNARY) {
168 } else if (strsrch->strength >= UCOL_QUATERNARY && sourcece == UCOL_IGNORABLE) {
284 * @param strsrch string search data
290 inline uint16_t initializePatternCETable(UStringSearch *strsrch,
293 UPattern *pattern = &(strsrch->pattern);
297 UCollationElements *coleiter = strsrch->utilIter;
300 coleiter = ucol_openElements(strsrch->collator, pattern->text,
305 strsrch->utilIter = coleiter;
324 uint32_t newce = getCE(strsrch, ce);
356 * @param strsrch string search data
362 inline uint16_t initializePatternPCETable(UStringSearch *strsrch,
365 UPattern *pattern = &(strsrch->pattern);
369 UCollationElements *coleiter = strsrch->utilIter;
372 coleiter = ucol_openElements(strsrch->collator, pattern->text,
377 strsrch->utilIter = coleiter;
429 * @param strsrch UStringSearch data storage
435 inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status)
438 UPattern *pattern = &(strsrch->pattern);
444 if (strsrch->strength == UCOL_PRIMARY) {
457 if (strsrch->pattern.pces != NULL) {
458 if (strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
459 uprv_free(strsrch->pattern.pces);
462 strsrch->pattern.pces = NULL;
466 return initializePatternCETable(strsrch, status);
518 * Building of the pattern collation element list and the boyer moore strsrch
541 * @param strsrch UStringSearch data storage
546 inline void initialize(UStringSearch *strsrch, UErrorCode *status)
548 int16_t expandlength = initializePattern(strsrch, status);
549 if (U_SUCCESS(*status) && strsrch->pattern.cesLength > 0) {
550 UPattern *pattern = &strsrch->pattern;
560 strsrch->pattern.defaultShiftSize = 0;
567 * @param strsrch string search data
572 void checkBreakBoundary(const UStringSearch *strsrch, int32_t * /*start*/,
576 UBreakIterator *breakiterator = strsrch->search->internalBreakIter;
598 * @param strsrch string search data
603 UBool isBreakUnit(const UStringSearch *strsrch, int32_t start,
607 UBreakIterator *breakiterator = strsrch->search->breakIter;
627 UCollationElements *coleiter = strsrch->utilIter;
628 const UChar *text = strsrch->search->text +
632 for (int32_t count = 0; count < strsrch->pattern.cesLength;
634 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
639 if (U_FAILURE(status) || ce != strsrch->pattern.ces[count]) {
645 && getCE(strsrch, nextce) == UCOL_IGNORABLE) {
694 * @param strsrch string search data
701 inline int32_t getNextUStringSearchBaseOffset(UStringSearch *strsrch,
704 int32_t textlength = strsrch->search->textLength;
705 if (strsrch->pattern.hasSuffixAccents &&
708 const UChar *text = strsrch->search->text;
722 * @param text strsrch string search data
730 inline int32_t shiftForward(UStringSearch *strsrch,
735 UPattern *pattern = &(strsrch->pattern);
750 textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset);
763 * @param strsrch string search data
766 inline void setMatchNotFound(UStringSearch *strsrch)
769 strsrch->search->matchedIndex = USEARCH_DONE;
770 strsrch->search->matchedLength = 0;
771 if (strsrch->search->isForwardSearching) {
772 setColEIterOffset(strsrch->textIter, strsrch->search->textLength);
775 setColEIterOffset(strsrch->textIter, 0);
820 * @param strsrch string search data
829 UBool checkExtraMatchAccents(const UStringSearch *strsrch, int32_t start,
834 if (strsrch->pattern.hasPrefixAccents) {
837 const UChar *text = strsrch->search->text + start;
842 int32_t safeoffset = getNextSafeOffset(strsrch->collator,
872 UCollationElements *coleiter = strsrch->utilIter;
874 uint32_t firstce = strsrch->pattern.ces[0];
912 * @param strsrch string search data
919 UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start,
922 if (strsrch->pattern.hasPrefixAccents) {
923 UCollationElements *coleiter = strsrch->textIter;
927 int32_t firstce = strsrch->pattern.ces[0];
930 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
938 ce = getCE(strsrch, ucol_next(coleiter, &status));
951 // accent = (getFCD(strsrch->search->text, &temp,
952 // strsrch->search->textLength)
957 UBool accent = getFCD(strsrch->search->text, &temp,
958 strsrch->search->textLength) > 0xFF;
960 return checkExtraMatchAccents(strsrch, start, end, &status);
967 U16_BACK_1(strsrch->search->text, 0, temp);
968 if (getFCD(strsrch->search->text, &temp,
969 strsrch->search->textLength) & LAST_BYTE_MASK_) {
993 * @param strsrch string search data
1000 UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
1003 if (strsrch->pattern.hasSuffixAccents) {
1004 const UChar *text = strsrch->search->text;
1006 int32_t textlength = strsrch->search->textLength;
1009 int32_t firstce = strsrch->pattern.ces[0];
1010 UCollationElements *coleiter = strsrch->textIter;
1014 while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstce) {
1020 while (count < strsrch->pattern.cesLength) {
1021 if (getCE(strsrch, ucol_next(coleiter, &status))
1037 ce = getCE(strsrch, ce);
1067 * @param strsrch string search data
1073 inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start,
1076 if (strsrch->strength != UCOL_IDENTICAL) {
1084 strsrch->nfd->normalize(
1085 UnicodeString(FALSE, strsrch->search->text + start, end - start), t2, status);
1086 strsrch->nfd->normalize(
1087 UnicodeString(FALSE, strsrch->pattern.text, strsrch->pattern.textLength), p2, status);
1095 * @param strsrch string search data
1101 inline UBool checkRepeatedMatch(UStringSearch *strsrch,
1105 int32_t lastmatchindex = strsrch->search->matchedIndex;
1110 if (strsrch->search->isForwardSearching) {
1116 if (!result && !strsrch->search->isOverlap) {
1117 if (strsrch->search->isForwardSearching) {
1118 result = start < lastmatchindex + strsrch->search->matchedLength;
1153 * @param strsrch string search data
1161 UBool checkNextExactContractionMatch(UStringSearch *strsrch,
1165 UCollationElements *coleiter = strsrch->textIter;
1166 int32_t textlength = strsrch->search->textLength;
1168 const UCollator *collator = strsrch->collator;
1169 const UChar *text = strsrch->search->text;
1204 int32_t *patternce = strsrch->pattern.ces;
1205 int32_t patterncelength = strsrch->pattern.cesLength;
1208 int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
1218 *end = getNextUStringSearchBaseOffset(strsrch, *end);
1240 * @param strsrch string search data
1248 inline UBool checkNextExactMatch(UStringSearch *strsrch,
1251 UCollationElements *coleiter = strsrch->textIter;
1254 if (!checkNextExactContractionMatch(strsrch, &start, textoffset, status)) {
1259 if (!isBreakUnit(strsrch, start, *textoffset) ||
1260 checkRepeatedMatch(strsrch, start, *textoffset) ||
1261 hasAccentsBeforeMatch(strsrch, start, *textoffset) ||
1262 !checkIdentical(strsrch, start, *textoffset) ||
1263 hasAccentsAfterMatch(strsrch, start, *textoffset)) {
1266 *textoffset = getNextUStringSearchBaseOffset(strsrch, *textoffset);
1271 if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
1272 checkBreakBoundary(strsrch, &start, textoffset);
1276 strsrch->search->matchedIndex = start;
1277 strsrch->search->matchedLength = *textoffset - start;
1399 * @param strsrch string search data
1404 inline UBool checkCollationMatch(const UStringSearch *strsrch,
1407 int patternceindex = strsrch->pattern.cesLength;
1408 int32_t *patternce = strsrch->pattern.ces;
1411 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
1436 * @param strsrch string search match
1444 int32_t doNextCanonicalPrefixMatch(UStringSearch *strsrch,
1449 const UChar *text = strsrch->search->text;
1450 int32_t textlength = strsrch->search->textLength;
1474 UCollationElements *coleiter = strsrch->utilIter;
1476 UChar *rearrange = strsrch->canonicalPrefixAccents;
1494 strsrch->canonicalPrefixAccents,
1495 strsrch->search->text + offset,
1497 strsrch->canonicalSuffixAccents,
1504 if (checkCollationMatch(strsrch, coleiter)) {
1544 * @param strsrch string search data
1550 inline void cleanUpSafeText(const UStringSearch *strsrch, UChar *safetext,
1553 if (safetext != safebuffer && safetext != strsrch->canonicalSuffixAccents)
1568 * @param strsrch string search data
1575 int32_t doNextCanonicalSuffixMatch(UStringSearch *strsrch,
1579 const UChar *text = strsrch->search->text;
1580 const UCollator *collator = strsrch->collator;
1585 UCollationElements *coleiter = strsrch->utilIter;
1588 if (textoffset != 0 && ucol_unsafeCP(strsrch->canonicalSuffixAccents[0],
1595 strsrch->canonicalSuffixAccents,
1599 safetextlength = u_strlen(strsrch->canonicalSuffixAccents);
1600 safetext = strsrch->canonicalSuffixAccents;
1607 int32_t *ce = strsrch->pattern.ces;
1608 int32_t celength = strsrch->pattern.cesLength;
1616 cleanUpSafeText(strsrch, safetext, safebuffer);
1622 if (coleiter == strsrch->textIter) {
1623 cleanUpSafeText(strsrch, safetext, safebuffer);
1626 cleanUpSafeText(strsrch, safetext, safebuffer);
1628 coleiter = strsrch->textIter;
1634 textce = getCE(strsrch, textce);
1640 cleanUpSafeText(strsrch, safetext, safebuffer);
1646 cleanUpSafeText(strsrch, safetext, safebuffer);
1650 int32_t result = doNextCanonicalPrefixMatch(strsrch,
1654 setColEIterOffset(strsrch->textIter, result);
1671 cleanUpSafeText(strsrch, safetext, safebuffer);
1678 setColEIterOffset(strsrch->textIter, result);
1679 strsrch->textIter->iteratordata_.toReturn =
1680 setExpansionPrefix(strsrch->textIter, leftoverces);
1701 * @param strsrch string search data
1708 UBool doNextCanonicalMatch(UStringSearch *strsrch,
1712 const UChar *text = strsrch->search->text;
1716 UCollationElements *coleiter = strsrch->textIter;
1718 if (strsrch->pattern.hasPrefixAccents) {
1719 offset = doNextCanonicalPrefixMatch(strsrch, offset, textoffset,
1729 if (!strsrch->pattern.hasSuffixAccents) {
1747 UChar *rearrange = strsrch->canonicalSuffixAccents;
1763 int32_t offset = doNextCanonicalSuffixMatch(strsrch, baseoffset,
1776 * @param strsrch string search data
1782 inline int32_t getPreviousUStringSearchBaseOffset(UStringSearch *strsrch,
1785 if (strsrch->pattern.hasPrefixAccents && textoffset > 0) {
1786 const UChar *text = strsrch->search->text;
1788 if (getFCD(text, &offset, strsrch->search->textLength) >>
1803 * @param strsrch string search data
1810 UBool checkNextCanonicalContractionMatch(UStringSearch *strsrch,
1815 UCollationElements *coleiter = strsrch->textIter;
1816 int32_t textlength = strsrch->search->textLength;
1818 const UCollator *collator = strsrch->collator;
1819 const UChar *text = strsrch->search->text;
1847 int32_t *patternce = strsrch->pattern.ces;
1848 int32_t patterncelength = strsrch->pattern.cesLength;
1850 int32_t textlength = strsrch->search->textLength;
1852 int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
1869 ce = getCE(strsrch, ucol_next(coleiter, status));
1873 ce = getCE(strsrch, ucol_next(coleiter, status));
1879 *end = getNextUStringSearchBaseOffset(strsrch, *end);
1900 * @param strsrch string search data
1908 inline UBool checkNextCanonicalMatch(UStringSearch *strsrch,
1913 UCollationElements *coleiter = strsrch->textIter;
1915 if ((strsrch->pattern.hasSuffixAccents &&
1916 strsrch->canonicalSuffixAccents[0]) ||
1917 (strsrch->pattern.hasPrefixAccents &&
1918 strsrch->canonicalPrefixAccents[0])) {
1919 strsrch->search->matchedIndex = getPreviousUStringSearchBaseOffset(
1920 strsrch,
1922 strsrch->search->matchedLength = *textoffset -
1923 strsrch->search->matchedIndex;
1928 if (!checkNextCanonicalContractionMatch(strsrch, &start, textoffset,
1933 start = getPreviousUStringSearchBaseOffset(strsrch, start);
1935 if (checkRepeatedMatch(strsrch, start, *textoffset) ||
1936 !isBreakUnit(strsrch, start, *textoffset) ||
1937 !checkIdentical(strsrch, start, *textoffset)) {
1939 *textoffset = getNextBaseOffset(strsrch->search->text, *textoffset,
1940 strsrch->search->textLength);
1944 strsrch->search->matchedIndex = start;
1945 strsrch->search->matchedLength = *textoffset - start;
1955 * @param text strsrch string search data
1963 inline int32_t reverseShift(UStringSearch *strsrch,
1968 if (strsrch->search->isOverlap) {
1969 if (textoffset != strsrch->search->textLength) {
1973 textoffset -= strsrch->pattern.defaultShiftSize;
1978 int32_t shift = strsrch->pattern.backShift[hashFromCE32(ce)];
1989 textoffset -= strsrch->pattern.defaultShiftSize;
1992 textoffset = getPreviousUStringSearchBaseOffset(strsrch, textoffset);
2001 * @param strsrch string search data
2008 UBool checkPreviousExactContractionMatch(UStringSearch *strsrch,
2012 UCollationElements *coleiter = strsrch->textIter;
2013 int32_t textlength = strsrch->search->textLength;
2015 const UCollator *collator = strsrch->collator;
2016 const UChar *text = strsrch->search->text;
2045 int32_t *patternce = strsrch->pattern.ces;
2046 int32_t patterncelength = strsrch->pattern.cesLength;
2049 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
2083 * @param strsrch string search data
2094 inline UBool checkPreviousExactMatch(UStringSearch *strsrch,
2099 int32_t end = ucol_getOffset(strsrch->textIter);
2100 if (!checkPreviousExactContractionMatch(strsrch, textoffset, &end, status)
2107 if (checkRepeatedMatch(strsrch, *textoffset, end) ||
2108 !isBreakUnit(strsrch, *textoffset, end) ||
2109 hasAccentsBeforeMatch(strsrch, *textoffset, end) ||
2110 !checkIdentical(strsrch, *textoffset, end) ||
2111 hasAccentsAfterMatch(strsrch, *textoffset, end)) {
2113 *textoffset = getPreviousBaseOffset(strsrch->search->text,
2119 if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
2120 checkBreakBoundary(strsrch, textoffset, &end);
2123 strsrch->search->matchedIndex = *textoffset;
2124 strsrch->search->matchedLength = end - *textoffset;
2140 * @param strsrch string search match
2148 int32_t doPreviousCanonicalSuffixMatch(UStringSearch *strsrch,
2153 const UChar *text = strsrch->search->text;
2157 if (!(getFCD(text, &tempend, strsrch->search->textLength) &
2162 end = getNextBaseOffset(text, end, strsrch->search->textLength);
2176 UCollationElements *coleiter = strsrch->utilIter;
2178 UChar *rearrange = strsrch->canonicalSuffixAccents;
2196 strsrch->canonicalPrefixAccents,
2197 strsrch->search->text + start,
2199 strsrch->canonicalSuffixAccents,
2206 if (checkCollationMatch(strsrch, coleiter)) {
2228 * @param strsrch string search data
2235 int32_t doPreviousCanonicalPrefixMatch(UStringSearch *strsrch,
2239 const UChar *text = strsrch->search->text;
2240 const UCollator *collator = strsrch->collator;
2248 ucol_unsafeCP(strsrch->canonicalPrefixAccents[
2249 u_strlen(strsrch->canonicalPrefixAccents) - 1
2252 strsrch->search->textLength);
2256 strsrch->canonicalPrefixAccents,
2261 safetextlength = u_strlen(strsrch->canonicalPrefixAccents);
2262 safetext = strsrch->canonicalPrefixAccents;
2265 UCollationElements *coleiter = strsrch->utilIter;
2270 int32_t *ce = strsrch->pattern.ces;
2271 int32_t celength = strsrch->pattern.cesLength;
2274 int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents);
2280 cleanUpSafeText(strsrch, safetext, safebuffer);
2286 if (coleiter == strsrch->textIter) {
2287 cleanUpSafeText(strsrch, safetext, safebuffer);
2290 cleanUpSafeText(strsrch, safetext, safebuffer);
2292 coleiter = strsrch->textIter;
2298 textce = getCE(strsrch, textce);
2304 cleanUpSafeText(strsrch, safetext, safebuffer);
2310 cleanUpSafeText(strsrch, safetext, safebuffer);
2314 int32_t result = doPreviousCanonicalSuffixMatch(strsrch,
2318 setColEIterOffset(strsrch->textIter, result);
2335 cleanUpSafeText(strsrch, safetext, safebuffer);
2342 setColEIterOffset(strsrch->textIter, result);
2343 setExpansionSuffix(strsrch->textIter, leftoverces);
2364 * @param strsrch string search data
2371 UBool doPreviousCanonicalMatch(UStringSearch *strsrch,
2375 const UChar *text = strsrch->search->text;
2377 int32_t textlength = strsrch->search->textLength;
2379 UCollationElements *coleiter = strsrch->textIter;
2381 if (strsrch->pattern.hasSuffixAccents) {
2382 offset = doPreviousCanonicalSuffixMatch(strsrch, textoffset,
2392 if (!strsrch->pattern.hasPrefixAccents) {
2410 UChar *rearrange = strsrch->canonicalPrefixAccents;
2426 int32_t offset = doPreviousCanonicalPrefixMatch(strsrch,
2441 * @param strsrch string search data
2448 UBool checkPreviousCanonicalContractionMatch(UStringSearch *strsrch,
2452 UCollationElements *coleiter = strsrch->textIter;
2453 int32_t textlength = strsrch->search->textLength;
2455 const UCollator *collator = strsrch->collator;
2456 const UChar *text = strsrch->search->text;
2485 int32_t *patternce = strsrch->pattern.ces;
2486 int32_t patterncelength = strsrch->pattern.cesLength;
2489 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
2507 ce = getCE(strsrch, ucol_previous(coleiter, status));
2511 ce = getCE(strsrch, ucol_previous(coleiter, status));
2538 * @param strsrch string search data
2546 inline UBool checkPreviousCanonicalMatch(UStringSearch *strsrch,
2551 UCollationElements *coleiter = strsrch->textIter;
2553 if ((strsrch->pattern.hasSuffixAccents &&
2554 strsrch->canonicalSuffixAccents[0]) ||
2555 (strsrch->pattern.hasPrefixAccents &&
2556 strsrch->canonicalPrefixAccents[0])) {
2557 strsrch->search->matchedIndex = *textoffset;
2558 strsrch->search->matchedLength =
2559 getNextUStringSearchBaseOffset(strsrch,
2566 if (!checkPreviousCanonicalContractionMatch(strsrch, textoffset, &end,
2572 end = getNextUStringSearchBaseOffset(strsrch, end);
2574 if (checkRepeatedMatch(strsrch, *textoffset, end) ||
2575 !isBreakUnit(strsrch, *textoffset, end) ||
2576 !checkIdentical(strsrch, *textoffset, end)) {
2578 *textoffset = getPreviousBaseOffset(strsrch->search->text,
2583 strsrch->search->matchedIndex = *textoffset;
2584 strsrch->search->matchedLength = end - *textoffset;
2752 U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch)
2754 if (strsrch) {
2755 if (strsrch->pattern.ces != strsrch->pattern.cesBuffer &&
2756 strsrch->pattern.ces) {
2757 uprv_free(strsrch->pattern.ces);
2760 if (strsrch->pattern.pces != NULL &&
2761 strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
2762 uprv_free(strsrch->pattern.pces);
2765 delete strsrch->textProcessedIter;
2766 ucol_closeElements(strsrch->textIter);
2767 ucol_closeElements(strsrch->utilIter);
2769 if (strsrch->ownCollator && strsrch->collator) {
2770 ucol_close((UCollator *)strsrch->collator);
2774 if (strsrch->search->internalBreakIter) {
2775 ubrk_close(strsrch->search->internalBreakIter);
2779 uprv_free(strsrch->search);
2780 uprv_free(strsrch);
2786 UBool initTextProcessedIter(UStringSearch *strsrch, UErrorCode *status) {
2788 if (strsrch->textProcessedIter == NULL) {
2789 strsrch->textProcessedIter = new icu::UCollationPCE(strsrch->textIter);
2790 if (strsrch->textProcessedIter == NULL) {
2795 strsrch->textProcessedIter->init(strsrch->textIter);
2804 U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
2808 if (U_SUCCESS(*status) && strsrch) {
2809 if (isOutOfBounds(strsrch->search->textLength, position)) {
2813 setColEIterOffset(strsrch->textIter, position);
2815 strsrch->search->matchedIndex = USEARCH_DONE;
2816 strsrch->search->matchedLength = 0;
2817 strsrch->search->reset = FALSE;
2821 U_CAPI int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch)
2823 if (strsrch) {
2824 int32_t result = ucol_getOffset(strsrch->textIter);
2825 if (isOutOfBounds(strsrch->search->textLength, result)) {
2833 U_CAPI void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch,
2838 if (U_SUCCESS(*status) && strsrch) {
2842 strsrch->search->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
2845 strsrch->search->isCanonicalMatch = (value == USEARCH_ON ? TRUE :
2850 strsrch->search->elementComparisonType = (int16_t)value;
2852 strsrch->search->elementComparisonType = 0;
2866 const UStringSearch *strsrch,
2869 if (strsrch) {
2872 return (strsrch->search->isOverlap == TRUE ? USEARCH_ON :
2875 return (strsrch->search->isCanonicalMatch == TRUE ? USEARCH_ON :
2879 int16_t value = strsrch->search->elementComparisonType;
2894 const UStringSearch *strsrch)
2896 if (strsrch == NULL) {
2899 return strsrch->search->matchedIndex;
2903 U_CAPI int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch,
2911 if (strsrch == NULL || resultCapacity < 0 || (resultCapacity > 0 &&
2917 int32_t copylength = strsrch->search->matchedLength;
2918 int32_t copyindex = strsrch->search->matchedIndex;
2928 uprv_memcpy(result, strsrch->search->text + copyindex,
2932 strsrch->search->matchedLength, status);
2936 const UStringSearch *strsrch)
2938 if (strsrch) {
2939 return strsrch->search->matchedLength;
2946 U_CAPI void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch,
2950 if (U_SUCCESS(*status) && strsrch) {
2951 strsrch->search->breakIter = breakiter;
2953 ubrk_setText(breakiter, strsrch->search->text,
2954 strsrch->search->textLength, status);
2960 usearch_getBreakIterator(const UStringSearch *strsrch)
2962 if (strsrch) {
2963 return strsrch->search->breakIter;
2970 U_CAPI void U_EXPORT2 usearch_setText( UStringSearch *strsrch,
2976 if (strsrch == NULL || text == NULL || textlength < -1 ||
2984 strsrch->search->text = text;
2985 strsrch->search->textLength = textlength;
2986 ucol_setText(strsrch->textIter, text, textlength, status);
2987 strsrch->search->matchedIndex = USEARCH_DONE;
2988 strsrch->search->matchedLength = 0;
2989 strsrch->search->reset = TRUE;
2991 if (strsrch->search->breakIter != NULL) {
2992 ubrk_setText(strsrch->search->breakIter, text,
2995 ubrk_setText(strsrch->search->internalBreakIter, text, textlength, status);
3001 U_CAPI const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch,
3004 if (strsrch) {
3005 *length = strsrch->search->textLength;
3006 return strsrch->search->text;
3011 U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
3021 if (strsrch) {
3022 delete strsrch->textProcessedIter;
3023 strsrch->textProcessedIter = NULL;
3024 ucol_closeElements(strsrch->textIter);
3025 ucol_closeElements(strsrch->utilIter);
3026 strsrch->textIter = strsrch->utilIter = NULL;
3027 if (strsrch->ownCollator && (strsrch->collator != collator)) {
3028 ucol_close((UCollator *)strsrch->collator);
3029 strsrch->ownCollator = FALSE;
3031 strsrch->collator = collator;
3032 strsrch->strength = ucol_getStrength(collator);
3033 strsrch->ceMask = getMask(strsrch->strength);
3035 ubrk_close(strsrch->search->internalBreakIter);
3036 strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(collator, ULOC_VALID_LOCALE, status),
3037 strsrch->search->text, strsrch->search->textLength, status);
3040 strsrch->toShift =
3044 strsrch->variableTop = ucol_getVariableTop(collator, status);
3045 strsrch->textIter = ucol_openElements(collator,
3046 strsrch->search->text,
3047 strsrch->search->textLength,
3049 strsrch->utilIter = ucol_openElements(
3050 collator, strsrch->pattern.text, strsrch->pattern.textLength, status);
3052 initialize(strsrch, status);
3059 uprv_init_pce(strsrch->textIter);
3060 uprv_init_pce(strsrch->utilIter);
3065 U_CAPI UCollator * U_EXPORT2 usearch_getCollator(const UStringSearch *strsrch)
3067 if (strsrch) {
3068 return (UCollator *)strsrch->collator;
3073 U_CAPI void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch,
3079 if (strsrch == NULL || pattern == NULL) {
3090 strsrch->pattern.text = pattern;
3091 strsrch->pattern.textLength = patternlength;
3092 initialize(strsrch, status);
3098 usearch_getPattern(const UStringSearch *strsrch,
3101 if (strsrch) {
3102 *length = strsrch->pattern.textLength;
3103 return strsrch->pattern.text;
3110 U_CAPI int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch,
3113 if (strsrch && U_SUCCESS(*status)) {
3114 strsrch->search->isForwardSearching = TRUE;
3115 usearch_setOffset(strsrch, 0, status);
3117 return usearch_next(strsrch, status);
3123 U_CAPI int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch,
3127 if (strsrch && U_SUCCESS(*status)) {
3128 strsrch->search->isForwardSearching = TRUE;
3130 usearch_setOffset(strsrch, position, status);
3132 return usearch_next(strsrch, status);
3138 U_CAPI int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch,
3141 if (strsrch && U_SUCCESS(*status)) {
3142 strsrch->search->isForwardSearching = FALSE;
3143 usearch_setOffset(strsrch, strsrch->search->textLength, status);
3145 return usearch_previous(strsrch, status);
3151 U_CAPI int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch,
3155 if (strsrch && U_SUCCESS(*status)) {
3156 strsrch->search->isForwardSearching = FALSE;
3158 usearch_setOffset(strsrch, position, status);
3160 return usearch_previous(strsrch, status);
3188 U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
3191 if (U_SUCCESS(*status) && strsrch) {
3194 int32_t offset = usearch_getOffset(strsrch);
3195 USearch *search = strsrch->search;
3202 (offset + strsrch->pattern.defaultShiftSize > textlength ||
3206 setMatchNotFound(strsrch);
3215 setMatchNotFound(strsrch);
3235 if (strsrch->pattern.cesLength == 0) {
3244 setColEIterOffset(strsrch->textIter, search->matchedIndex);
3254 ucol_setOffset(strsrch->textIter, offset + 1, status);
3257 ucol_setOffset(strsrch->textIter,
3271 usearch_handleNextCanonical(strsrch, status);
3274 usearch_handleNextExact(strsrch, status);
3284 ucol_setOffset(strsrch->textIter, search->textLength, status);
3286 ucol_setOffset(strsrch->textIter, search->matchedIndex, status);
3296 U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
3299 if (U_SUCCESS(*status) && strsrch) {
3301 USearch *search = strsrch->search;
3306 setColEIterOffset(strsrch->textIter, offset);
3309 offset = usearch_getOffset(strsrch);
3328 (offset < strsrch->pattern.defaultShiftSize ||
3330 matchedindex < strsrch->pattern.defaultShiftSize)))) {
3332 setMatchNotFound(strsrch);
3339 setMatchNotFound(strsrch);
3346 if (strsrch->pattern.cesLength == 0) {
3350 setMatchNotFound(strsrch);
3355 setColEIterOffset(strsrch->textIter, search->matchedIndex);
3361 if (strsrch->search->isCanonicalMatch) {
3363 usearch_handlePreviousCanonical(strsrch, status);
3367 usearch_handlePreviousExact(strsrch, status);
3384 U_CAPI void U_EXPORT2 usearch_reset(UStringSearch *strsrch)
3391 if (strsrch) {
3399 UCollationStrength newStrength = ucol_getStrength(strsrch->collator);
3400 if ((strsrch
3401 (strsrch->strength >= UCOL_QUATERNARY && newStrength < UCOL_QUATERNARY)) {
3405 strsrch->strength = ucol_getStrength(strsrch->collator);
3406 ceMask = getMask(strsrch->strength);
3407 if (strsrch->ceMask != ceMask) {
3408 strsrch->ceMask = ceMask;
3413 shift = ucol_getAttribute(strsrch->collator, UCOL_ALTERNATE_HANDLING,
3415 if (strsrch->toShift != shift) {
3416 strsrch->toShift = shift;
3421 varTop = ucol_getVariableTop(strsrch->collator, &status);
3422 if (strsrch->variableTop != varTop) {
3423 strsrch->variableTop = varTop;
3427 initialize(strsrch, &status);
3429 ucol_setText(strsrch->textIter, strsrch->search->text,
3430 strsrch->search->textLength,
3432 strsrch->search->matchedLength = 0;
3433 strsrch->search->matchedIndex = USEARCH_DONE;
3434 strsrch->search->isOverlap = FALSE;
3435 strsrch->search->isCanonicalMatch = FALSE;
3436 strsrch->search->elementComparisonType = 0;
3437 strsrch->search->isForwardSearching = TRUE;
3438 strsrch->search->reset = TRUE;
3621 static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex) {
3623 const UChar *text = strsrch->search->text;
3624 int32_t textLen = strsrch->search->textLength;
3660 UBreakIterator *breakiterator = strsrch->search->breakIter;
3663 breakiterator = strsrch->search->internalBreakIter;
3683 static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index) {
3685 const UChar *text = strsrch->search->text;
3686 int32_t textLen = strsrch->search->textLength;
3711 UBreakIterator *breakiterator = strsrch->search->breakIter;
3714 breakiterator = strsrch->search->internalBreakIter;
3725 static UBool onBreakBoundaries(const UStringSearch *strsrch, int32_t start, int32_t end)
3728 UBreakIterator *breakiterator = strsrch->search->breakIter;
3834 U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
3849 for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
3850 printf(" %8x", strsrch->pattern.ces[ii]);
3859 if(strsrch->pattern.cesLength == 0 ||
3861 startIdx > strsrch->search->textLength ||
3862 strsrch->pattern.ces == NULL) {
3867 if (strsrch->pattern.pces == NULL) {
3868 initializePatternPCETable(strsrch, status);
3871 ucol_setOffset(strsrch->textIter, startIdx, status);
3872 CEIBuffer ceb(strsrch, status);
3918 for (patIx=0; patIx<strsrch->pattern.pcesLength; patIx++) {
3919 patCE = strsrch->pattern.pces[patIx];
3924 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
3939 targetIxOffset += strsrch->pattern.pcesLength; // this is now the offset in target CE space to end of the match so far
3971 if (strsrch->search->elementComparisonType == 0) {
3989 UCompareCEsResult ceMatch = compareCE64s(nextCEI->ce, patCE, strsrch->search->elementComparisonType);
4013 if (!isBreakBoundary(strsrch, mStart)) {
4039 if (strsrch->search->text != NULL && strsrch->search->textLength > maxLimit) {
4041 strsrch->search->breakIter == NULL &&
4044 (strsrch->nfd->hasBoundaryBefore(codePointAt(*strsrch->search, maxLimit)) ||
4045 strsrch->nfd->hasBoundaryAfter(codePointBefore(*strsrch->search, maxLimit)));
4065 if (minLimit == lastCEI->highIndex && isBreakBoundary(strsrch, minLimit)) {
4068 int32_t nba = nextBoundaryAfter(strsrch, minLimit);
4091 if (!isBreakBoundary(strsrch, mLimit)) {
4096 if (! checkIdentical(strsrch, mStart, mLimit)) {
4134 U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
4149 for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
4150 printf(" %8x", strsrch->pattern.ces[ii]);
4159 if(strsrch->pattern.cesLength == 0 ||
4161 startIdx > strsrch->search->textLength ||
4162 strsrch->pattern.ces == NULL) {
4167 if (strsrch->pattern.pces == NULL) {
4168 initializePatternPCETable(strsrch, status);
4171 CEIBuffer ceb(strsrch, status);
4183 if (startIdx < strsrch->search->textLength) {
4184 UBreakIterator *bi = strsrch->search->internalBreakIter;
4187 ucol_setOffset(strsrch->textIter, next, status);
4195 ucol_setOffset(strsrch->textIter, startIdx, status);
4232 for (patIx = strsrch->pattern.pcesLength - 1; patIx >= 0; patIx -= 1) {
4233 int64_t patCE = strsrch->pattern.pces[patIx];
4235 targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 - patIx + targetIxOffset);
4239 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
4271 const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 + targetIxOffset);
4280 if (!isBreakBoundary(strsrch, mStart)) {
4322 if (strsrch->search->text != NULL && strsrch->search->textLength > maxLimit) {
4324 strsrch->search->breakIter == NULL &&
4327 (strsrch->nfd->hasBoundaryBefore(codePointAt(*strsrch->search, maxLimit)) ||
4328 strsrch->nfd->hasBoundaryAfter(codePointBefore(*strsrch->search, maxLimit)));
4341 int32_t nba = nextBoundaryAfter(strsrch, minLimit);
4358 if (!isBreakBoundary(strsrch, mLimit)) {
4368 int32_t nba = nextBoundaryAfter(strsrch, minLimit);
4379 if (! checkIdentical(strsrch, mStart, mLimit)) {
4419 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
4422 setMatchNotFound(strsrch);
4427 UCollationElements *coleiter = strsrch->textIter;
4428 int32_t textlength = strsrch->search->textLength;
4429 int32_t *patternce = strsrch->pattern.ces;
4430 int32_t patterncelength = strsrch->pattern.cesLength;
4436 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER,
4456 targetce = getCE(strsrch, targetce);
4486 targetce = getCE(strsrch, targetce);
4502 textoffset = shiftForward(strsrch, textoffset, lastce,
4509 if (checkNextExactMatch(strsrch, &textoffset, status)) {
4511 setColEIterOffset(coleiter, strsrch->search->matchedIndex);
4515 setMatchNotFound(strsrch);
4518 int32_t textOffset = ucol_getOffset(strsrch->textIter);
4522 if (usearch_search(strsrch, textOffset, &start, &end, status)) {
4523 strsrch->search->matchedIndex = start;
4524 strsrch->search->matchedLength = end - start;
4527 setMatchNotFound(strsrch);
4533 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
4536 setMatchNotFound(strsrch);
4541 UCollationElements *coleiter = strsrch->textIter;
4542 int32_t textlength = strsrch->search->textLength;
4543 int32_t *patternce = strsrch->pattern.ces;
4544 int32_t patterncelength = strsrch->pattern.cesLength;
4547 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4549 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER,
4551 strsrch->canonicalPrefixAccents[0] = 0;
4552 strsrch->canonicalSuffixAccents[0] = 0;
4572 targetce = getCE(strsrch, targetce);
4594 targetce = getCE(strsrch, targetce);
4606 strsrch->canonicalPrefixAccents[0] = 0;
4607 strsrch->canonicalSuffixAccents[0] = 0;
4611 found = doNextCanonicalMatch(strsrch, textoffset, status);
4618 textoffset = shiftForward(strsrch, textoffset, lastce,
4625 if (checkNextCanonicalMatch(strsrch, &textoffset, status)) {
4626 setColEIterOffset(coleiter, strsrch->search->matchedIndex);
4630 setMatchNotFound(strsrch);
4633 int32_t textOffset = ucol_getOffset(strsrch->textIter);
4637 if (usearch_search(strsrch, textOffset, &start, &end, status)) {
4638 strsrch->search->matchedIndex = start;
4639 strsrch->search->matchedLength = end - start;
4642 setMatchNotFound(strsrch);
4648 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
4651 setMatchNotFound(strsrch);
4656 UCollationElements *coleiter = strsrch->textIter;
4657 int32_t *patternce = strsrch->pattern.ces;
4658 int32_t patterncelength = strsrch->pattern.cesLength;
4664 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4665 textoffset = strsrch->search->matchedIndex;
4668 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER,
4691 targetce = getCE(strsrch, targetce);
4695 if (targetce == UCOL_IGNORABLE && strsrch->strength != UCOL_PRIMARY) {
4719 targetce = getCE(strsrch, targetce);
4736 textoffset = reverseShift(strsrch, textoffset, targetce,
4742 if (checkPreviousExactMatch(strsrch, &textoffset, status)) {
4747 setMatchNotFound(strsrch);
4752 if (strsrch->search->isOverlap) {
4753 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4754 textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
4757 initializePatternPCETable(strsrch, status);
4758 if (!initTextProcessedIter(strsrch, status)) {
4759 setMatchNotFound(strsrch);
4762 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
4763 int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
4770 setMatchNotFound(strsrch);
4773 textOffset = ucol_getOffset(strsrch->textIter);
4776 textOffset = ucol_getOffset(strsrch->textIter);
4782 if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
4783 strsrch->search->matchedIndex = start;
4784 strsrch->search->matchedLength = end - start;
4787 setMatchNotFound(strsrch);
4793 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
4797 setMatchNotFound(strsrch);
4802 UCollationElements *coleiter = strsrch->textIter;
4803 int32_t *patternce = strsrch->pattern.ces;
4804 int32_t patterncelength = strsrch->pattern.cesLength;
4807 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4812 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4813 textoffset = strsrch->search->matchedIndex;
4816 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER,
4818 strsrch->canonicalPrefixAccents[0] = 0;
4819 strsrch->canonicalSuffixAccents[0] = 0;
4839 targetce = getCE(strsrch, targetce);
4865 targetce = getCE(strsrch, targetce);
4877 strsrch->canonicalPrefixAccents[0] = 0;
4878 strsrch->canonicalSuffixAccents[0] = 0;
4882 found = doPreviousCanonicalMatch(strsrch, textoffset, status);
4889 textoffset = reverseShift(strsrch, textoffset, targetce,
4895 if (checkPreviousCanonicalMatch(strsrch, &textoffset, status)) {
4900 setMatchNotFound(strsrch);
4905 if (strsrch->search->isOverlap) {
4906 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4907 textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
4910 initializePatternPCETable(strsrch, status);
4911 if (!initTextProcessedIter(strsrch, status)) {
4912 setMatchNotFound(strsrch);
4915 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
4916 int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
4923 setMatchNotFound(strsrch);
4926 textOffset = ucol_getOffset(strsrch->textIter);
4929 textOffset = ucol_getOffset(strsrch->textIter);
4935 if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
4936 strsrch->search->matchedIndex = start;
4937 strsrch->search->matchedLength = end - start;
4940 setMatchNotFound(strsrch);