Home | History | Annotate | Download | only in i18n

Lines Matching refs:strsrch

144 * @param strsrch string search data
149 inline int32_t getCE(const UStringSearch *strsrch, uint32_t sourcece)
154 sourcece &= strsrch->ceMask;
156 if (strsrch->toShift) {
162 if (strsrch->variableTop > sourcece) {
163 if (strsrch->strength >= UCOL_QUATERNARY) {
170 } else if (strsrch->strength >= UCOL_QUATERNARY && sourcece == UCOL_IGNORABLE) {
286 * @param strsrch string search data
292 inline uint16_t initializePatternCETable(UStringSearch *strsrch,
295 UPattern *pattern = &(strsrch->pattern);
299 UCollationElements *coleiter = strsrch->utilIter;
302 coleiter = ucol_openElements(strsrch->collator, pattern->text,
307 strsrch->utilIter = coleiter;
326 uint32_t newce = getCE(strsrch, ce);
358 * @param strsrch string search data
364 inline uint16_t initializePatternPCETable(UStringSearch *strsrch,
367 UPattern *pattern = &(strsrch->pattern);
371 UCollationElements *coleiter = strsrch->utilIter;
374 coleiter = ucol_openElements(strsrch->collator, pattern->text,
379 strsrch->utilIter = coleiter;
431 * @param strsrch UStringSearch data storage
437 inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status)
440 UPattern *pattern = &(strsrch->pattern);
446 if (strsrch->strength == UCOL_PRIMARY) {
459 if (strsrch->pattern.pces != NULL) {
460 if (strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
461 uprv_free(strsrch->pattern.pces);
464 strsrch->pattern.pces = NULL;
468 return initializePatternCETable(strsrch, status);
520 * Building of the pattern collation element list and the boyer moore strsrch
543 * @param strsrch UStringSearch data storage
548 inline void initialize(UStringSearch *strsrch, UErrorCode *status)
550 int16_t expandlength = initializePattern(strsrch, status);
551 if (U_SUCCESS(*status) && strsrch->pattern.cesLength > 0) {
552 UPattern *pattern = &strsrch->pattern;
562 strsrch->pattern.defaultShiftSize = 0;
569 * @param strsrch string search data
574 void checkBreakBoundary(const UStringSearch *strsrch, int32_t * /*start*/,
578 UBreakIterator *breakiterator = strsrch->search->internalBreakIter;
600 * @param strsrch string search data
605 UBool isBreakUnit(const UStringSearch *strsrch, int32_t start,
609 UBreakIterator *breakiterator = strsrch->search->breakIter;
629 UCollationElements *coleiter = strsrch->utilIter;
630 const UChar *text = strsrch->search->text +
634 for (int32_t count = 0; count < strsrch->pattern.cesLength;
636 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
641 if (U_FAILURE(status) || ce != strsrch->pattern.ces[count]) {
647 && getCE(strsrch, nextce) == UCOL_IGNORABLE) {
696 * @param strsrch string search data
703 inline int32_t getNextUStringSearchBaseOffset(UStringSearch *strsrch,
706 int32_t textlength = strsrch->search->textLength;
707 if (strsrch->pattern.hasSuffixAccents &&
710 const UChar *text = strsrch->search->text;
724 * @param text strsrch string search data
732 inline int32_t shiftForward(UStringSearch *strsrch,
737 UPattern *pattern = &(strsrch->pattern);
752 textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset);
765 * @param strsrch string search data
768 inline void setMatchNotFound(UStringSearch *strsrch)
771 strsrch->search->matchedIndex = USEARCH_DONE;
772 strsrch->search->matchedLength = 0;
773 if (strsrch->search->isForwardSearching) {
774 setColEIterOffset(strsrch->textIter, strsrch->search->textLength);
777 setColEIterOffset(strsrch->textIter, 0);
822 * @param strsrch string search data
831 UBool checkExtraMatchAccents(const UStringSearch *strsrch, int32_t start,
836 if (strsrch->pattern.hasPrefixAccents) {
839 const UChar *text = strsrch->search->text + start;
844 int32_t safeoffset = getNextSafeOffset(strsrch->collator,
874 UCollationElements *coleiter = strsrch->utilIter;
876 uint32_t firstce = strsrch->pattern.ces[0];
914 * @param strsrch string search data
921 UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start,
924 if (strsrch->pattern.hasPrefixAccents) {
925 UCollationElements *coleiter = strsrch->textIter;
929 int32_t firstce = strsrch->pattern.ces[0];
932 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
940 ce = getCE(strsrch, ucol_next(coleiter, &status));
953 // accent = (getFCD(strsrch->search->text, &temp,
954 // strsrch->search->textLength)
959 UBool accent = getFCD(strsrch->search->text, &temp,
960 strsrch->search->textLength) > 0xFF;
962 return checkExtraMatchAccents(strsrch, start, end, &status);
969 U16_BACK_1(strsrch->search->text, 0, temp);
970 if (getFCD(strsrch->search->text, &temp,
971 strsrch->search->textLength) & LAST_BYTE_MASK_) {
995 * @param strsrch string search data
1002 UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
1005 if (strsrch->pattern.hasSuffixAccents) {
1006 const UChar *text = strsrch->search->text;
1008 int32_t textlength = strsrch->search->textLength;
1011 int32_t firstce = strsrch->pattern.ces[0];
1012 UCollationElements *coleiter = strsrch->textIter;
1016 while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstce) {
1022 while (count < strsrch->pattern.cesLength) {
1023 if (getCE(strsrch, ucol_next(coleiter, &status))
1039 ce = getCE(strsrch, ce);
1069 * @param strsrch string search data
1075 inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start,
1078 if (strsrch->strength != UCOL_IDENTICAL) {
1086 strsrch->nfd->normalize(
1087 UnicodeString(FALSE, strsrch->search->text + start, end - start), t2, status);
1088 strsrch->nfd->normalize(
1089 UnicodeString(FALSE, strsrch->pattern.text, strsrch->pattern.textLength), p2, status);
1097 * @param strsrch string search data
1103 inline UBool checkRepeatedMatch(UStringSearch *strsrch,
1107 int32_t lastmatchindex = strsrch->search->matchedIndex;
1112 if (strsrch->search->isForwardSearching) {
1118 if (!result && !strsrch->search->isOverlap) {
1119 if (strsrch->search->isForwardSearching) {
1120 result = start < lastmatchindex + strsrch->search->matchedLength;
1155 * @param strsrch string search data
1163 UBool checkNextExactContractionMatch(UStringSearch *strsrch,
1167 UCollationElements *coleiter = strsrch->textIter;
1168 int32_t textlength = strsrch->search->textLength;
1170 const UCollator *collator = strsrch->collator;
1171 const UChar *text = strsrch->search->text;
1206 int32_t *patternce = strsrch->pattern.ces;
1207 int32_t patterncelength = strsrch->pattern.cesLength;
1210 int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
1220 *end = getNextUStringSearchBaseOffset(strsrch, *end);
1242 * @param strsrch string search data
1250 inline UBool checkNextExactMatch(UStringSearch *strsrch,
1253 UCollationElements *coleiter = strsrch->textIter;
1256 if (!checkNextExactContractionMatch(strsrch, &start, textoffset, status)) {
1261 if (!isBreakUnit(strsrch, start, *textoffset) ||
1262 checkRepeatedMatch(strsrch, start, *textoffset) ||
1263 hasAccentsBeforeMatch(strsrch, start, *textoffset) ||
1264 !checkIdentical(strsrch, start, *textoffset) ||
1265 hasAccentsAfterMatch(strsrch, start, *textoffset)) {
1268 *textoffset = getNextUStringSearchBaseOffset(strsrch, *textoffset);
1273 if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
1274 checkBreakBoundary(strsrch, &start, textoffset);
1278 strsrch->search->matchedIndex = start;
1279 strsrch->search->matchedLength = *textoffset - start;
1401 * @param strsrch string search data
1406 inline UBool checkCollationMatch(const UStringSearch *strsrch,
1409 int patternceindex = strsrch->pattern.cesLength;
1410 int32_t *patternce = strsrch->pattern.ces;
1413 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
1438 * @param strsrch string search match
1446 int32_t doNextCanonicalPrefixMatch(UStringSearch *strsrch,
1451 const UChar *text = strsrch->search->text;
1452 int32_t textlength = strsrch->search->textLength;
1476 UCollationElements *coleiter = strsrch->utilIter;
1478 UChar *rearrange = strsrch->canonicalPrefixAccents;
1496 strsrch->canonicalPrefixAccents,
1497 strsrch->search->text + offset,
1499 strsrch->canonicalSuffixAccents,
1506 if (checkCollationMatch(strsrch, coleiter)) {
1546 * @param strsrch string search data
1552 inline void cleanUpSafeText(const UStringSearch *strsrch, UChar *safetext,
1555 if (safetext != safebuffer && safetext != strsrch->canonicalSuffixAccents)
1570 * @param strsrch string search data
1577 int32_t doNextCanonicalSuffixMatch(UStringSearch *strsrch,
1581 const UChar *text = strsrch->search->text;
1582 const UCollator *collator = strsrch->collator;
1587 UCollationElements *coleiter = strsrch->utilIter;
1590 if (textoffset != 0 && ucol_unsafeCP(strsrch->canonicalSuffixAccents[0],
1597 strsrch->canonicalSuffixAccents,
1601 safetextlength = u_strlen(strsrch->canonicalSuffixAccents);
1602 safetext = strsrch->canonicalSuffixAccents;
1609 int32_t *ce = strsrch->pattern.ces;
1610 int32_t celength = strsrch->pattern.cesLength;
1618 cleanUpSafeText(strsrch, safetext, safebuffer);
1624 if (coleiter == strsrch->textIter) {
1625 cleanUpSafeText(strsrch, safetext, safebuffer);
1628 cleanUpSafeText(strsrch, safetext, safebuffer);
1630 coleiter = strsrch->textIter;
1636 textce = getCE(strsrch, textce);
1642 cleanUpSafeText(strsrch, safetext, safebuffer);
1648 cleanUpSafeText(strsrch, safetext, safebuffer);
1652 int32_t result = doNextCanonicalPrefixMatch(strsrch,
1656 setColEIterOffset(strsrch->textIter, result);
1673 cleanUpSafeText(strsrch, safetext, safebuffer);
1680 setColEIterOffset(strsrch->textIter, result);
1681 strsrch->textIter->iteratordata_.toReturn =
1682 setExpansionPrefix(strsrch->textIter, leftoverces);
1703 * @param strsrch string search data
1710 UBool doNextCanonicalMatch(UStringSearch *strsrch,
1714 const UChar *text = strsrch->search->text;
1718 UCollationElements *coleiter = strsrch->textIter;
1720 if (strsrch->pattern.hasPrefixAccents) {
1721 offset = doNextCanonicalPrefixMatch(strsrch, offset, textoffset,
1731 if (!strsrch->pattern.hasSuffixAccents) {
1749 UChar *rearrange = strsrch->canonicalSuffixAccents;
1765 int32_t offset = doNextCanonicalSuffixMatch(strsrch, baseoffset,
1778 * @param strsrch string search data
1784 inline int32_t getPreviousUStringSearchBaseOffset(UStringSearch *strsrch,
1787 if (strsrch->pattern.hasPrefixAccents && textoffset > 0) {
1788 const UChar *text = strsrch->search->text;
1790 if (getFCD(text, &offset, strsrch->search->textLength) >>
1805 * @param strsrch string search data
1812 UBool checkNextCanonicalContractionMatch(UStringSearch *strsrch,
1817 UCollationElements *coleiter = strsrch->textIter;
1818 int32_t textlength = strsrch->search->textLength;
1820 const UCollator *collator = strsrch->collator;
1821 const UChar *text = strsrch->search->text;
1849 int32_t *patternce = strsrch->pattern.ces;
1850 int32_t patterncelength = strsrch->pattern.cesLength;
1852 int32_t textlength = strsrch->search->textLength;
1854 int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
1871 ce = getCE(strsrch, ucol_next(coleiter, status));
1875 ce = getCE(strsrch, ucol_next(coleiter, status));
1881 *end = getNextUStringSearchBaseOffset(strsrch, *end);
1902 * @param strsrch string search data
1910 inline UBool checkNextCanonicalMatch(UStringSearch *strsrch,
1915 UCollationElements *coleiter = strsrch->textIter;
1917 if ((strsrch->pattern.hasSuffixAccents &&
1918 strsrch->canonicalSuffixAccents[0]) ||
1919 (strsrch->pattern.hasPrefixAccents &&
1920 strsrch->canonicalPrefixAccents[0])) {
1921 strsrch->search->matchedIndex = getPreviousUStringSearchBaseOffset(
1922 strsrch,
1924 strsrch->search->matchedLength = *textoffset -
1925 strsrch->search->matchedIndex;
1930 if (!checkNextCanonicalContractionMatch(strsrch, &start, textoffset,
1935 start = getPreviousUStringSearchBaseOffset(strsrch, start);
1937 if (checkRepeatedMatch(strsrch, start, *textoffset) ||
1938 !isBreakUnit(strsrch, start, *textoffset) ||
1939 !checkIdentical(strsrch, start, *textoffset)) {
1941 *textoffset = getNextBaseOffset(strsrch->search->text, *textoffset,
1942 strsrch->search->textLength);
1946 strsrch->search->matchedIndex = start;
1947 strsrch->search->matchedLength = *textoffset - start;
1957 * @param text strsrch string search data
1965 inline int32_t reverseShift(UStringSearch *strsrch,
1970 if (strsrch->search->isOverlap) {
1971 if (textoffset != strsrch->search->textLength) {
1975 textoffset -= strsrch->pattern.defaultShiftSize;
1980 int32_t shift = strsrch->pattern.backShift[hashFromCE32(ce)];
1991 textoffset -= strsrch->pattern.defaultShiftSize;
1994 textoffset = getPreviousUStringSearchBaseOffset(strsrch, textoffset);
2003 * @param strsrch string search data
2010 UBool checkPreviousExactContractionMatch(UStringSearch *strsrch,
2014 UCollationElements *coleiter = strsrch->textIter;
2015 int32_t textlength = strsrch->search->textLength;
2017 const UCollator *collator = strsrch->collator;
2018 const UChar *text = strsrch->search->text;
2047 int32_t *patternce = strsrch->pattern.ces;
2048 int32_t patterncelength = strsrch->pattern.cesLength;
2051 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
2085 * @param strsrch string search data
2096 inline UBool checkPreviousExactMatch(UStringSearch *strsrch,
2101 int32_t end = ucol_getOffset(strsrch->textIter);
2102 if (!checkPreviousExactContractionMatch(strsrch, textoffset, &end, status)
2109 if (checkRepeatedMatch(strsrch, *textoffset, end) ||
2110 !isBreakUnit(strsrch, *textoffset, end) ||
2111 hasAccentsBeforeMatch(strsrch, *textoffset, end) ||
2112 !checkIdentical(strsrch, *textoffset, end) ||
2113 hasAccentsAfterMatch(strsrch, *textoffset, end)) {
2115 *textoffset = getPreviousBaseOffset(strsrch->search->text,
2121 if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
2122 checkBreakBoundary(strsrch, textoffset, &end);
2125 strsrch
2126 strsrch->search->matchedLength = end - *textoffset;
2142 * @param strsrch string search match
2150 int32_t doPreviousCanonicalSuffixMatch(UStringSearch *strsrch,
2155 const UChar *text = strsrch->search->text;
2159 if (!(getFCD(text, &tempend, strsrch->search->textLength) &
2164 end = getNextBaseOffset(text, end, strsrch->search->textLength);
2178 UCollationElements *coleiter = strsrch->utilIter;
2180 UChar *rearrange = strsrch->canonicalSuffixAccents;
2198 strsrch->canonicalPrefixAccents,
2199 strsrch->search->text + start,
2201 strsrch->canonicalSuffixAccents,
2208 if (checkCollationMatch(strsrch, coleiter)) {
2230 * @param strsrch string search data
2237 int32_t doPreviousCanonicalPrefixMatch(UStringSearch *strsrch,
2241 const UChar *text = strsrch->search->text;
2242 const UCollator *collator = strsrch->collator;
2250 ucol_unsafeCP(strsrch->canonicalPrefixAccents[
2251 u_strlen(strsrch->canonicalPrefixAccents) - 1
2254 strsrch->search->textLength);
2258 strsrch->canonicalPrefixAccents,
2263 safetextlength = u_strlen(strsrch->canonicalPrefixAccents);
2264 safetext = strsrch->canonicalPrefixAccents;
2267 UCollationElements *coleiter = strsrch->utilIter;
2272 int32_t *ce = strsrch->pattern.ces;
2273 int32_t celength = strsrch->pattern.cesLength;
2276 int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents);
2282 cleanUpSafeText(strsrch, safetext, safebuffer);
2288 if (coleiter == strsrch->textIter) {
2289 cleanUpSafeText(strsrch, safetext, safebuffer);
2292 cleanUpSafeText(strsrch, safetext, safebuffer);
2294 coleiter = strsrch->textIter;
2300 textce = getCE(strsrch, textce);
2306 cleanUpSafeText(strsrch, safetext, safebuffer);
2312 cleanUpSafeText(strsrch, safetext, safebuffer);
2316 int32_t result = doPreviousCanonicalSuffixMatch(strsrch,
2320 setColEIterOffset(strsrch->textIter, result);
2337 cleanUpSafeText(strsrch, safetext, safebuffer);
2344 setColEIterOffset(strsrch->textIter, result);
2345 setExpansionSuffix(strsrch->textIter, leftoverces);
2366 * @param strsrch string search data
2373 UBool doPreviousCanonicalMatch(UStringSearch *strsrch,
2377 const UChar *text = strsrch->search->text;
2379 int32_t textlength = strsrch->search->textLength;
2381 UCollationElements *coleiter = strsrch->textIter;
2383 if (strsrch->pattern.hasSuffixAccents) {
2384 offset = doPreviousCanonicalSuffixMatch(strsrch, textoffset,
2394 if (!strsrch->pattern.hasPrefixAccents) {
2412 UChar *rearrange = strsrch->canonicalPrefixAccents;
2428 int32_t offset = doPreviousCanonicalPrefixMatch(strsrch,
2443 * @param strsrch string search data
2450 UBool checkPreviousCanonicalContractionMatch(UStringSearch *strsrch,
2454 UCollationElements *coleiter = strsrch->textIter;
2455 int32_t textlength = strsrch->search->textLength;
2457 const UCollator *collator = strsrch->collator;
2458 const UChar *text = strsrch->search->text;
2487 int32_t *patternce = strsrch->pattern.ces;
2488 int32_t patterncelength = strsrch->pattern.cesLength;
2491 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
2509 ce = getCE(strsrch, ucol_previous(coleiter, status));
2513 ce = getCE(strsrch, ucol_previous(coleiter, status));
2540 * @param strsrch string search data
2548 inline UBool checkPreviousCanonicalMatch(UStringSearch *strsrch,
2553 UCollationElements *coleiter = strsrch->textIter;
2555 if ((strsrch->pattern.hasSuffixAccents &&
2556 strsrch->canonicalSuffixAccents[0]) ||
2557 (strsrch->pattern.hasPrefixAccents &&
2558 strsrch->canonicalPrefixAccents[0])) {
2559 strsrch->search->matchedIndex = *textoffset;
2560 strsrch->search->matchedLength =
2561 getNextUStringSearchBaseOffset(strsrch,
2568 if (!checkPreviousCanonicalContractionMatch(strsrch, textoffset, &end,
2574 end = getNextUStringSearchBaseOffset(strsrch, end);
2576 if (checkRepeatedMatch(strsrch, *textoffset, end) ||
2577 !isBreakUnit(strsrch, *textoffset, end) ||
2578 !checkIdentical(strsrch, *textoffset, end)) {
2580 *textoffset = getPreviousBaseOffset(strsrch->search->text,
2585 strsrch->search->matchedIndex = *textoffset;
2586 strsrch->search->matchedLength = end - *textoffset;
2754 U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch)
2756 if (strsrch) {
2757 if (strsrch->pattern.ces != strsrch->pattern.cesBuffer &&
2758 strsrch->pattern.ces) {
2759 uprv_free(strsrch->pattern.ces);
2762 if (strsrch->pattern.pces != NULL &&
2763 strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
2764 uprv_free(strsrch->pattern.pces);
2767 delete strsrch->textProcessedIter;
2768 ucol_closeElements(strsrch->textIter);
2769 ucol_closeElements(strsrch->utilIter);
2771 if (strsrch->ownCollator && strsrch->collator) {
2772 ucol_close((UCollator *)strsrch->collator);
2776 if (strsrch->search->internalBreakIter) {
2777 ubrk_close(strsrch->search->internalBreakIter);
2781 uprv_free(strsrch->search);
2782 uprv_free(strsrch);
2788 UBool initTextProcessedIter(UStringSearch *strsrch, UErrorCode *status) {
2790 if (strsrch->textProcessedIter == NULL) {
2791 strsrch->textProcessedIter = new icu::UCollationPCE(strsrch->textIter);
2792 if (strsrch->textProcessedIter == NULL) {
2797 strsrch->textProcessedIter->init(strsrch->textIter);
2806 U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
2810 if (U_SUCCESS(*status) && strsrch) {
2811 if (isOutOfBounds(strsrch->search->textLength, position)) {
2815 setColEIterOffset(strsrch->textIter, position);
2817 strsrch->search->matchedIndex = USEARCH_DONE;
2818 strsrch->search->matchedLength = 0;
2819 strsrch->search->reset = FALSE;
2823 U_CAPI int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch)
2825 if (strsrch) {
2826 int32_t result = ucol_getOffset(strsrch->textIter);
2827 if (isOutOfBounds(strsrch->search->textLength, result)) {
2835 U_CAPI void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch,
2840 if (U_SUCCESS(*status) && strsrch) {
2844 strsrch->search->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
2847 strsrch->search->isCanonicalMatch = (value == USEARCH_ON ? TRUE :
2852 strsrch->search->elementComparisonType = (int16_t)value;
2854 strsrch->search->elementComparisonType = 0;
2868 const UStringSearch *strsrch,
2871 if (strsrch) {
2874 return (strsrch->search->isOverlap == TRUE ? USEARCH_ON :
2877 return (strsrch->search->isCanonicalMatch == TRUE ? USEARCH_ON :
2881 int16_t value = strsrch->search->elementComparisonType;
2896 const UStringSearch *strsrch)
2898 if (strsrch == NULL) {
2901 return strsrch->search->matchedIndex;
2905 U_CAPI int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch,
2913 if (strsrch == NULL || resultCapacity < 0 || (resultCapacity > 0 &&
2919 int32_t copylength = strsrch->search->matchedLength;
2920 int32_t copyindex = strsrch->search->matchedIndex;
2930 uprv_memcpy(result, strsrch->search->text + copyindex,
2934 strsrch->search->matchedLength, status);
2938 const UStringSearch *strsrch)
2940 if (strsrch) {
2941 return strsrch->search->matchedLength;
2948 U_CAPI void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch,
2952 if (U_SUCCESS(*status) && strsrch) {
2953 strsrch->search->breakIter = breakiter;
2955 ubrk_setText(breakiter, strsrch->search->text,
2956 strsrch->search->textLength, status);
2962 usearch_getBreakIterator(const UStringSearch *strsrch)
2964 if (strsrch) {
2965 return strsrch->search->breakIter;
2972 U_CAPI void U_EXPORT2 usearch_setText( UStringSearch *strsrch,
2978 if (strsrch == NULL || text == NULL || textlength < -1 ||
2986 strsrch->search->text = text;
2987 strsrch->search->textLength = textlength;
2988 ucol_setText(strsrch->textIter, text, textlength, status);
2989 strsrch->search->matchedIndex = USEARCH_DONE;
2990 strsrch->search->matchedLength = 0;
2991 strsrch->search->reset = TRUE;
2993 if (strsrch->search->breakIter != NULL) {
2994 ubrk_setText(strsrch->search->breakIter, text,
2997 ubrk_setText(strsrch->search->internalBreakIter, text, textlength, status);
3003 U_CAPI const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch,
3006 if (strsrch) {
3007 *length = strsrch->search->textLength;
3008 return strsrch->search->text;
3013 U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
3023 if (strsrch) {
3024 delete strsrch->textProcessedIter;
3025 strsrch->textProcessedIter = NULL;
3026 ucol_closeElements(strsrch->textIter);
3027 ucol_closeElements(strsrch->utilIter);
3028 strsrch->textIter = strsrch->utilIter = NULL;
3029 if (strsrch->ownCollator && (strsrch->collator != collator)) {
3030 ucol_close((UCollator *)strsrch->collator);
3031 strsrch->ownCollator = FALSE;
3033 strsrch->collator = collator;
3034 strsrch->strength = ucol_getStrength(collator);
3035 strsrch->ceMask = getMask(strsrch->strength);
3037 ubrk_close(strsrch->search->internalBreakIter);
3038 strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(collator, ULOC_VALID_LOCALE, status),
3039 strsrch->search->text, strsrch->search->textLength, status);
3042 strsrch->toShift =
3046 strsrch->variableTop = ucol_getVariableTop(collator, status);
3047 strsrch->textIter = ucol_openElements(collator,
3048 strsrch->search->text,
3049 strsrch->search->textLength,
3051 strsrch->utilIter = ucol_openElements(
3052 collator, strsrch->pattern.text, strsrch->pattern.textLength, status);
3054 initialize(strsrch, status);
3061 uprv_init_pce(strsrch->textIter);
3062 uprv_init_pce(strsrch->utilIter);
3067 U_CAPI UCollator * U_EXPORT2 usearch_getCollator(const UStringSearch *strsrch)
3069 if (strsrch) {
3070 return (UCollator *)strsrch->collator;
3075 U_CAPI void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch,
3081 if (strsrch == NULL || pattern == NULL) {
3092 strsrch->pattern.text = pattern;
3093 strsrch->pattern.textLength = patternlength;
3094 initialize(strsrch, status);
3100 usearch_getPattern(const UStringSearch *strsrch,
3103 if (strsrch) {
3104 *length = strsrch->pattern.textLength;
3105 return strsrch->pattern.text;
3112 U_CAPI int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch,
3115 if (strsrch && U_SUCCESS(*status)) {
3116 strsrch->search->isForwardSearching = TRUE;
3117 usearch_setOffset(strsrch, 0, status);
3119 return usearch_next(strsrch, status);
3125 U_CAPI int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch,
3129 if (strsrch && U_SUCCESS(*status)) {
3130 strsrch->search->isForwardSearching = TRUE;
3132 usearch_setOffset(strsrch, position, status);
3134 return usearch_next(strsrch, status);
3140 U_CAPI int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch,
3143 if (strsrch && U_SUCCESS(*status)) {
3144 strsrch->search->isForwardSearching = FALSE;
3145 usearch_setOffset(strsrch, strsrch->search->textLength, status);
3147 return usearch_previous(strsrch, status);
3153 U_CAPI int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch,
3157 if (strsrch && U_SUCCESS(*status)) {
3158 strsrch->search->isForwardSearching = FALSE;
3160 usearch_setOffset(strsrch, position, status);
3162 return usearch_previous(strsrch, status);
3190 U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
3193 if (U_SUCCESS(*status) && strsrch) {
3196 int32_t offset = usearch_getOffset(strsrch);
3197 USearch *search = strsrch->search;
3204 (offset + strsrch->pattern.defaultShiftSize > textlength ||
3208 setMatchNotFound(strsrch);
3217 setMatchNotFound(strsrch);
3237 if (strsrch->pattern.cesLength == 0) {
3246 setColEIterOffset(strsrch->textIter, search->matchedIndex);
3256 ucol_setOffset(strsrch->textIter, offset + 1, status);
3259 ucol_setOffset(strsrch->textIter,
3273 usearch_handleNextCanonical(strsrch, status);
3276 usearch_handleNextExact(strsrch, status);
3286 ucol_setOffset(strsrch->textIter, search->textLength, status);
3288 ucol_setOffset(strsrch->textIter, search->matchedIndex, status);
3298 U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
3301 if (U_SUCCESS(*status) && strsrch) {
3303 USearch *search = strsrch->search;
3308 setColEIterOffset(strsrch->textIter, offset);
3311 offset = usearch_getOffset(strsrch);
3330 (offset < strsrch->pattern.defaultShiftSize ||
3332 matchedindex < strsrch->pattern.defaultShiftSize)))) {
3334 setMatchNotFound(strsrch);
3341 setMatchNotFound(strsrch);
3348 if (strsrch->pattern.cesLength == 0) {
3352 setMatchNotFound(strsrch);
3357 setColEIterOffset(strsrch->textIter, search->matchedIndex);
3363 if (strsrch->search->isCanonicalMatch) {
3365 usearch_handlePreviousCanonical(strsrch, status);
3369 usearch_handlePreviousExact(strsrch, status);
3386 U_CAPI void U_EXPORT2 usearch_reset(UStringSearch *strsrch)
3393 if (strsrch) {
3401 UCollationStrength newStrength = ucol_getStrength(strsrch->collator);
3402 if ((strsrch->strength < UCOL_QUATERNARY && newStrength >= UCOL_QUATERNARY) ||
3403 (strsrch->strength >= UCOL_QUATERNARY && newStrength < UCOL_QUATERNARY)) {
3407 strsrch->strength = ucol_getStrength(strsrch->collator);
3408 ceMask = getMask(strsrch->strength);
3409 if (strsrch->ceMask != ceMask) {
3410 strsrch->ceMask = ceMask;
3415 shift = ucol_getAttribute(strsrch->collator, UCOL_ALTERNATE_HANDLING,
3417 if (strsrch->toShift != shift) {
3418 strsrch->toShift = shift;
3423 varTop = ucol_getVariableTop(strsrch->collator, &status);
3424 if (strsrch->variableTop != varTop) {
3425 strsrch->variableTop = varTop;
3429 initialize(strsrch, &status);
3431 ucol_setText(strsrch->textIter, strsrch->search->text,
3432 strsrch->search->textLength,
3434 strsrch->search->matchedLength = 0;
3435 strsrch->search->matchedIndex = USEARCH_DONE;
3436 strsrch->search->isOverlap = FALSE;
3437 strsrch->search->isCanonicalMatch = FALSE;
3438 strsrch->search->elementComparisonType = 0;
3439 strsrch->search->isForwardSearching = TRUE;
3440 strsrch->search->reset = TRUE;
3623 static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex) {
3625 const UChar *text = strsrch->search->text;
3626 int32_t textLen = strsrch->search->textLength;
3662 UBreakIterator *breakiterator = strsrch->search->breakIter;
3665 breakiterator = strsrch->search->internalBreakIter;
3685 static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index) {
3687 const UChar *text = strsrch->search->text;
3688 int32_t textLen = strsrch->search->textLength;
3713 UBreakIterator *breakiterator = strsrch->search->breakIter;
3716 breakiterator = strsrch->search->internalBreakIter;
3727 static UBool onBreakBoundaries(const UStringSearch *strsrch, int32_t start, int32_t end)
3730 UBreakIterator *breakiterator = strsrch->search->breakIter;
3836 U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
3851 for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
3852 printf(" %8x", strsrch->pattern.ces[ii]);
3861 if(strsrch->pattern.cesLength == 0 ||
3863 startIdx > strsrch->search->textLength ||
3864 strsrch->pattern.ces == NULL) {
3869 if (strsrch->pattern.pces == NULL) {
3870 initializePatternPCETable(strsrch, status);
3873 ucol_setOffset(strsrch->textIter, startIdx, status);
3874 CEIBuffer ceb(strsrch, status);
3920 for (patIx=0; patIx<strsrch->pattern.pcesLength; patIx++) {
3921 patCE = strsrch->pattern.pces[patIx];
3926 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
3941 targetIxOffset += strsrch->pattern.pcesLength; // this is now the offset in target CE space to end of the match so far
3973 if (strsrch->search->elementComparisonType == 0) {
3991 UCompareCEsResult ceMatch = compareCE64s(nextCEI->ce, patCE, strsrch->search->elementComparisonType);
4015 if (!isBreakBoundary(strsrch, mStart)) {
4041 if (strsrch->search->text != NULL && strsrch->search->textLength > maxLimit) {
4043 strsrch->search->breakIter == NULL &&
4046 (strsrch->nfd->hasBoundaryBefore(codePointAt(*strsrch->search, maxLimit)) ||
4047 strsrch->nfd->hasBoundaryAfter(codePointBefore(*strsrch->search, maxLimit)));
4067 if (minLimit == lastCEI->highIndex && isBreakBoundary(strsrch, minLimit)) {
4070 int32_t nba = nextBoundaryAfter(strsrch, minLimit);
4093 if (!isBreakBoundary(strsrch, mLimit)) {
4098 if (! checkIdentical(strsrch, mStart, mLimit)) {
4136 U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
4151 for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
4152 printf(" %8x", strsrch->pattern.ces[ii]);
4161 if(strsrch->pattern.cesLength == 0 ||
4163 startIdx > strsrch->search->textLength ||
4164 strsrch->pattern.ces == NULL) {
4169 if (strsrch->pattern.pces == NULL) {
4170 initializePatternPCETable(strsrch, status);
4173 CEIBuffer ceb(strsrch, status);
4185 if (startIdx < strsrch->search->textLength) {
4186 UBreakIterator *bi = strsrch->search->internalBreakIter;
4189 ucol_setOffset(strsrch->textIter, next, status);
4197 ucol_setOffset(strsrch->textIter, startIdx, status);
4234 for (patIx = strsrch->pattern.pcesLength - 1; patIx >= 0; patIx -= 1) {
4235 int64_t patCE = strsrch->pattern.pces[patIx];
4237 targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 - patIx + targetIxOffset);
4241 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
4273 const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 + targetIxOffset);
4282 if (!isBreakBoundary(strsrch, mStart)) {
4324 if (strsrch->search->text != NULL && strsrch->search->textLength > maxLimit) {
4326 strsrch->search->breakIter == NULL &&
4329 (strsrch->nfd->hasBoundaryBefore(codePointAt(*strsrch->search, maxLimit)) ||
4330 strsrch->nfd->hasBoundaryAfter(codePointBefore(*strsrch->search, maxLimit)));
4343 int32_t nba = nextBoundaryAfter(strsrch, minLimit);
4360 if (!isBreakBoundary(strsrch, mLimit)) {
4370 int32_t nba = nextBoundaryAfter(strsrch, minLimit);
4381 if (! checkIdentical(strsrch, mStart, mLimit)) {
4421 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
4424 setMatchNotFound(strsrch);
4429 UCollationElements *coleiter = strsrch->textIter;
4430 int32_t textlength = strsrch->search->textLength;
4431 int32_t *patternce = strsrch->pattern.ces;
4432 int32_t patterncelength = strsrch->pattern.cesLength;
4438 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER,
4458 targetce = getCE(strsrch, targetce);
4488 targetce = getCE(strsrch, targetce);
4504 textoffset = shiftForward(strsrch, textoffset, lastce,
4511 if (checkNextExactMatch(strsrch, &textoffset, status)) {
4513 setColEIterOffset(coleiter, strsrch->search->matchedIndex);
4517 setMatchNotFound(strsrch);
4520 int32_t textOffset = ucol_getOffset(strsrch->textIter);
4524 if (usearch_search(strsrch, textOffset, &start, &end, status)) {
4525 strsrch->search->matchedIndex = start;
4526 strsrch->search->matchedLength = end - start;
4529 setMatchNotFound(strsrch);
4535 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
4538 setMatchNotFound(strsrch);
4543 UCollationElements *coleiter = strsrch->textIter;
4544 int32_t textlength = strsrch->search->textLength;
4545 int32_t *patternce = strsrch->pattern.ces;
4546 int32_t patterncelength = strsrch->pattern.cesLength;
4549 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4551 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER,
4553 strsrch->canonicalPrefixAccents[0] = 0;
4554 strsrch->canonicalSuffixAccents[0] = 0;
4574 targetce = getCE(strsrch, targetce);
4596 targetce = getCE(strsrch, targetce);
4608 strsrch->canonicalPrefixAccents[0] = 0;
4609 strsrch->canonicalSuffixAccents[0] = 0;
4613 found = doNextCanonicalMatch(strsrch, textoffset, status);
4620 textoffset = shiftForward(strsrch, textoffset, lastce,
4627 if (checkNextCanonicalMatch(strsrch, &textoffset, status)) {
4628 setColEIterOffset(coleiter, strsrch->search->matchedIndex);
4632 setMatchNotFound(strsrch);
4635 int32_t textOffset = ucol_getOffset(strsrch->textIter);
4639 if (usearch_search(strsrch, textOffset, &start, &end, status)) {
4640 strsrch->search->matchedIndex = start;
4641 strsrch->search->matchedLength = end - start;
4644 setMatchNotFound(strsrch);
4650 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
4653 setMatchNotFound(strsrch);
4658 UCollationElements *coleiter = strsrch->textIter;
4659 int32_t *patternce = strsrch->pattern.ces;
4660 int32_t patterncelength = strsrch->pattern.cesLength;
4666 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4667 textoffset = strsrch->search->matchedIndex;
4670 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER,
4693 targetce = getCE(strsrch, targetce);
4697 if (targetce == UCOL_IGNORABLE && strsrch->strength != UCOL_PRIMARY) {
4721 targetce = getCE(strsrch, targetce);
4738 textoffset = reverseShift(strsrch, textoffset, targetce,
4744 if (checkPreviousExactMatch(strsrch, &textoffset, status)) {
4749 setMatchNotFound(strsrch);
4754 if (strsrch->search->isOverlap) {
4755 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4756 textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
4759 initializePatternPCETable(strsrch, status);
4760 if (!initTextProcessedIter(strsrch, status)) {
4761 setMatchNotFound(strsrch);
4764 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
4765 int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
4772 setMatchNotFound(strsrch);
4775 textOffset = ucol_getOffset(strsrch->textIter);
4778 textOffset = ucol_getOffset(strsrch->textIter);
4784 if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
4785 strsrch->search->matchedIndex = start;
4786 strsrch->search->matchedLength = end - start;
4789 setMatchNotFound(strsrch);
4795 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
4799 setMatchNotFound(strsrch);
4804 UCollationElements *coleiter = strsrch->textIter;
4805 int32_t *patternce = strsrch->pattern.ces;
4806 int32_t patterncelength = strsrch->pattern.cesLength;
4809 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4814 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4815 textoffset = strsrch->search->matchedIndex;
4818 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER,
4820 strsrch->canonicalPrefixAccents[0] = 0;
4821 strsrch->canonicalSuffixAccents[0] = 0;
4841 targetce = getCE(strsrch, targetce);
4867 targetce = getCE(strsrch, targetce);
4879 strsrch->canonicalPrefixAccents[0] = 0;
4880 strsrch->canonicalSuffixAccents[0] = 0;
4884 found = doPreviousCanonicalMatch(strsrch, textoffset, status);
4891 textoffset = reverseShift(strsrch, textoffset, targetce,
4897 if (checkPreviousCanonicalMatch(strsrch, &textoffset, status)) {
4902 setMatchNotFound(strsrch);
4907 if (strsrch->search->isOverlap) {
4908 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4909 textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
4912 initializePatternPCETable(strsrch, status);
4913 if (!initTextProcessedIter(strsrch, status)) {
4914 setMatchNotFound(strsrch);
4917 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
4918 int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
4925 setMatchNotFound(strsrch);
4928 textOffset = ucol_getOffset(strsrch->textIter);
4931 textOffset = ucol_getOffset(strsrch->textIter);
4937 if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
4938 strsrch->search->matchedIndex = start;
4939 strsrch->search->matchedLength = end - start;
4942 setMatchNotFound(strsrch);