Home | History | Annotate | Download | only in i18n

Lines Matching refs:strsrch

140 * @param strsrch string search data
145 inline int32_t getCE(const UStringSearch *strsrch, uint32_t sourcece)
150 sourcece &= strsrch->ceMask;
152 if (strsrch->toShift) {
158 if (strsrch->variableTop > sourcece) {
159 if (strsrch->strength >= UCOL_QUATERNARY) {
166 } else if (strsrch->strength >= UCOL_QUATERNARY && sourcece == UCOL_IGNORABLE) {
282 * @param strsrch string search data
288 inline uint16_t initializePatternCETable(UStringSearch *strsrch,
291 UPattern *pattern = &(strsrch->pattern);
295 UCollationElements *coleiter = strsrch->utilIter;
298 coleiter = ucol_openElements(strsrch->collator, pattern->text,
303 strsrch->utilIter = coleiter;
322 uint32_t newce = getCE(strsrch, ce);
354 * @param strsrch string search data
360 inline uint16_t initializePatternPCETable(UStringSearch *strsrch,
363 UPattern *pattern = &(strsrch->pattern);
367 UCollationElements *coleiter = strsrch->utilIter;
370 coleiter = ucol_openElements(strsrch->collator, pattern->text,
375 strsrch->utilIter = coleiter;
427 * @param strsrch UStringSearch data storage
433 inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status)
436 UPattern *pattern = &(strsrch->pattern);
442 if (strsrch->strength == UCOL_PRIMARY) {
455 if (strsrch->pattern.PCE != NULL) {
456 if (strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
457 uprv_free(strsrch->pattern.PCE);
460 strsrch->pattern.PCE = NULL;
464 return initializePatternCETable(strsrch, status);
516 * Building of the pattern collation element list and the boyer moore strsrch
539 * @param strsrch UStringSearch data storage
544 inline void initialize(UStringSearch *strsrch, UErrorCode *status)
546 int16_t expandlength = initializePattern(strsrch, status);
547 if (U_SUCCESS(*status) && strsrch->pattern.CELength > 0) {
548 UPattern *pattern = &strsrch->pattern;
558 strsrch->pattern.defaultShiftSize = 0;
565 * @param strsrch string search data
570 void checkBreakBoundary(const UStringSearch *strsrch, int32_t * /*start*/,
574 UBreakIterator *breakiterator = strsrch->search->internalBreakIter;
596 * @param strsrch string search data
601 UBool isBreakUnit(const UStringSearch *strsrch, int32_t start,
605 UBreakIterator *breakiterator = strsrch->search->breakIter;
625 UCollationElements *coleiter = strsrch->utilIter;
626 const UChar *text = strsrch->search->text +
630 for (int32_t count = 0; count < strsrch->pattern.CELength;
632 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
637 if (U_FAILURE(status) || ce != strsrch->pattern.CE[count]) {
643 && getCE(strsrch, nextce) == UCOL_IGNORABLE) {
692 * @param strsrch string search data
699 inline int32_t getNextUStringSearchBaseOffset(UStringSearch *strsrch,
702 int32_t textlength = strsrch->search->textLength;
703 if (strsrch->pattern.hasSuffixAccents &&
706 const UChar *text = strsrch->search->text;
720 * @param text strsrch string search data
728 inline int32_t shiftForward(UStringSearch *strsrch,
733 UPattern *pattern = &(strsrch->pattern);
748 textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset);
761 * @param strsrch string search data
764 inline void setMatchNotFound(UStringSearch *strsrch)
767 strsrch->search->matchedIndex = USEARCH_DONE;
768 strsrch->search->matchedLength = 0;
769 if (strsrch->search->isForwardSearching) {
770 setColEIterOffset(strsrch->textIter, strsrch->search->textLength);
773 setColEIterOffset(strsrch->textIter, 0);
818 * @param strsrch string search data
827 UBool checkExtraMatchAccents(const UStringSearch *strsrch, int32_t start,
832 if (strsrch->pattern.hasPrefixAccents) {
835 const UChar *text = strsrch->search->text + start;
840 int32_t safeoffset = getNextSafeOffset(strsrch->collator,
870 UCollationElements *coleiter = strsrch->utilIter;
872 uint32_t firstce = strsrch->pattern.CE[0];
910 * @param strsrch string search data
917 UBool hasAccentsBeforeMatch(const UStringSearch *strsrch, int32_t start,
920 if (strsrch->pattern.hasPrefixAccents) {
921 UCollationElements *coleiter = strsrch->textIter;
925 int32_t firstce = strsrch->pattern.CE[0];
928 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
936 ce = getCE(strsrch, ucol_next(coleiter, &status));
949 // accent = (getFCD(strsrch->search->text, &temp,
950 // strsrch->search->textLength)
955 UBool accent = getFCD(strsrch->search->text, &temp,
956 strsrch->search->textLength) > 0xFF;
958 return checkExtraMatchAccents(strsrch, start, end, &status);
965 U16_BACK_1(strsrch->search->text, 0, temp);
966 if (getFCD(strsrch->search->text, &temp,
967 strsrch->search->textLength) & LAST_BYTE_MASK_) {
991 * @param strsrch string search data
998 UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
1001 if (strsrch->pattern.hasSuffixAccents) {
1002 const UChar *text = strsrch->search->text;
1004 int32_t textlength = strsrch->search->textLength;
1007 int32_t firstce = strsrch->pattern.CE[0];
1008 UCollationElements *coleiter = strsrch->textIter;
1012 while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstce) {
1018 while (count < strsrch->pattern.CELength) {
1019 if (getCE(strsrch, ucol_next(coleiter, &status))
1035 ce = getCE(strsrch, ce);
1065 * @param strsrch string search data
1071 inline UBool checkIdentical(const UStringSearch *strsrch, int32_t start,
1074 if (strsrch->strength != UCOL_IDENTICAL) {
1082 strsrch->nfd->normalize(
1083 UnicodeString(FALSE, strsrch->search->text + start, end - start), t2, status);
1084 strsrch->nfd->normalize(
1085 UnicodeString(FALSE, strsrch->pattern.text, strsrch->pattern.textLength), p2, status);
1093 * @param strsrch string search data
1099 inline UBool checkRepeatedMatch(UStringSearch *strsrch,
1103 int32_t lastmatchindex = strsrch->search->matchedIndex;
1108 if (strsrch->search->isForwardSearching) {
1114 if (!result && !strsrch->search->isOverlap) {
1115 if (strsrch->search->isForwardSearching) {
1116 result = start < lastmatchindex + strsrch->search->matchedLength;
1151 * @param strsrch string search data
1159 UBool checkNextExactContractionMatch(UStringSearch *strsrch,
1163 UCollationElements *coleiter = strsrch->textIter;
1164 int32_t textlength = strsrch->search->textLength;
1166 const UCollator *collator = strsrch->collator;
1167 const UChar *text = strsrch->search->text;
1202 int32_t *patternce = strsrch->pattern.CE;
1203 int32_t patterncelength = strsrch->pattern.CELength;
1206 int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
1216 *end = getNextUStringSearchBaseOffset(strsrch, *end);
1238 * @param strsrch string search data
1246 inline UBool checkNextExactMatch(UStringSearch *strsrch,
1249 UCollationElements *coleiter = strsrch->textIter;
1252 if (!checkNextExactContractionMatch(strsrch, &start, textoffset, status)) {
1257 if (!isBreakUnit(strsrch, start, *textoffset) ||
1258 checkRepeatedMatch(strsrch, start, *textoffset) ||
1259 hasAccentsBeforeMatch(strsrch, start, *textoffset) ||
1260 !checkIdentical(strsrch, start, *textoffset) ||
1261 hasAccentsAfterMatch(strsrch, start, *textoffset)) {
1264 *textoffset = getNextUStringSearchBaseOffset(strsrch, *textoffset);
1269 if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
1270 checkBreakBoundary(strsrch, &start, textoffset);
1274 strsrch->search->matchedIndex = start;
1275 strsrch->search->matchedLength = *textoffset - start;
1397 * @param strsrch string search data
1402 inline UBool checkCollationMatch(const UStringSearch *strsrch,
1405 int patternceindex = strsrch->pattern.CELength;
1406 int32_t *patternce = strsrch->pattern.CE;
1409 int32_t ce = getCE(strsrch, ucol_next(coleiter, &status));
1434 * @param strsrch string search match
1442 int32_t doNextCanonicalPrefixMatch(UStringSearch *strsrch,
1447 const UChar *text = strsrch->search->text;
1448 int32_t textlength = strsrch->search->textLength;
1472 UCollationElements *coleiter = strsrch->utilIter;
1474 UChar *rearrange = strsrch->canonicalPrefixAccents;
1492 strsrch->canonicalPrefixAccents,
1493 strsrch->search->text + offset,
1495 strsrch->canonicalSuffixAccents,
1502 if (checkCollationMatch(strsrch, coleiter)) {
1542 * @param strsrch string search data
1548 inline void cleanUpSafeText(const UStringSearch *strsrch, UChar *safetext,
1551 if (safetext != safebuffer && safetext != strsrch->canonicalSuffixAccents)
1566 * @param strsrch string search data
1573 int32_t doNextCanonicalSuffixMatch(UStringSearch *strsrch,
1577 const UChar *text = strsrch->search->text;
1578 const UCollator *collator = strsrch->collator;
1583 UCollationElements *coleiter = strsrch->utilIter;
1586 if (textoffset != 0 && ucol_unsafeCP(strsrch->canonicalSuffixAccents[0],
1593 strsrch->canonicalSuffixAccents,
1597 safetextlength = u_strlen(strsrch->canonicalSuffixAccents);
1598 safetext = strsrch->canonicalSuffixAccents;
1605 int32_t *ce = strsrch->pattern.CE;
1606 int32_t celength = strsrch->pattern.CELength;
1614 cleanUpSafeText(strsrch, safetext, safebuffer);
1620 if (coleiter == strsrch->textIter) {
1621 cleanUpSafeText(strsrch, safetext, safebuffer);
1624 cleanUpSafeText(strsrch, safetext, safebuffer);
1626 coleiter = strsrch->textIter;
1632 textce = getCE(strsrch, textce);
1638 cleanUpSafeText(strsrch, safetext, safebuffer);
1644 cleanUpSafeText(strsrch, safetext, safebuffer);
1648 int32_t result = doNextCanonicalPrefixMatch(strsrch,
1652 setColEIterOffset(strsrch->textIter, result);
1669 cleanUpSafeText(strsrch, safetext, safebuffer);
1676 setColEIterOffset(strsrch->textIter, result);
1677 strsrch->textIter->iteratordata_.toReturn =
1678 setExpansionPrefix(strsrch->textIter, leftoverces);
1699 * @param strsrch string search data
1706 UBool doNextCanonicalMatch(UStringSearch *strsrch,
1710 const UChar *text = strsrch->search->text;
1714 UCollationElements *coleiter = strsrch->textIter;
1716 if (strsrch->pattern.hasPrefixAccents) {
1717 offset = doNextCanonicalPrefixMatch(strsrch, offset, textoffset,
1727 if (!strsrch->pattern.hasSuffixAccents) {
1745 UChar *rearrange = strsrch->canonicalSuffixAccents;
1761 int32_t offset = doNextCanonicalSuffixMatch(strsrch, baseoffset,
1774 * @param strsrch string search data
1780 inline int32_t getPreviousUStringSearchBaseOffset(UStringSearch *strsrch,
1783 if (strsrch->pattern.hasPrefixAccents && textoffset > 0) {
1784 const UChar *text = strsrch->search->text;
1786 if (getFCD(text, &offset, strsrch->search->textLength) >>
1801 * @param strsrch string search data
1808 UBool checkNextCanonicalContractionMatch(UStringSearch *strsrch,
1813 UCollationElements *coleiter = strsrch->textIter;
1814 int32_t textlength = strsrch->search->textLength;
1816 const UCollator *collator = strsrch->collator;
1817 const UChar *text = strsrch->search->text;
1845 int32_t *patternce = strsrch->pattern.CE;
1846 int32_t patterncelength = strsrch->pattern.CELength;
1848 int32_t textlength = strsrch->search->textLength;
1850 int32_t ce = getCE(strsrch, ucol_next(coleiter, status));
1867 ce = getCE(strsrch, ucol_next(coleiter, status));
1871 ce = getCE(strsrch, ucol_next(coleiter, status));
1877 *end = getNextUStringSearchBaseOffset(strsrch, *end);
1898 * @param strsrch string search data
1906 inline UBool checkNextCanonicalMatch(UStringSearch *strsrch,
1911 UCollationElements *coleiter = strsrch->textIter;
1913 if ((strsrch->pattern.hasSuffixAccents &&
1914 strsrch->canonicalSuffixAccents[0]) ||
1915 (strsrch->pattern.hasPrefixAccents &&
1916 strsrch->canonicalPrefixAccents[0])) {
1917 strsrch->search->matchedIndex = getPreviousUStringSearchBaseOffset(
1918 strsrch,
1920 strsrch->search->matchedLength = *textoffset -
1921 strsrch->search->matchedIndex;
1926 if (!checkNextCanonicalContractionMatch(strsrch, &start, textoffset,
1931 start = getPreviousUStringSearchBaseOffset(strsrch, start);
1933 if (checkRepeatedMatch(strsrch, start, *textoffset) ||
1934 !isBreakUnit(strsrch, start, *textoffset) ||
1935 !checkIdentical(strsrch, start, *textoffset)) {
1937 *textoffset = getNextBaseOffset(strsrch->search->text, *textoffset,
1938 strsrch->search->textLength);
1942 strsrch->search->matchedIndex = start;
1943 strsrch->search->matchedLength = *textoffset - start;
1953 * @param text strsrch string search data
1961 inline int32_t reverseShift(UStringSearch *strsrch,
1966 if (strsrch->search->isOverlap) {
1967 if (textoffset != strsrch->search->textLength) {
1971 textoffset -= strsrch->pattern.defaultShiftSize;
1976 int32_t shift = strsrch->pattern.backShift[hash(ce)];
1987 textoffset -= strsrch->pattern.defaultShiftSize;
1990 textoffset = getPreviousUStringSearchBaseOffset(strsrch, textoffset);
1999 * @param strsrch string search data
2006 UBool checkPreviousExactContractionMatch(UStringSearch *strsrch,
2010 UCollationElements *coleiter = strsrch->textIter;
2011 int32_t textlength = strsrch->search->textLength;
2013 const UCollator *collator = strsrch->collator;
2014 const UChar *text = strsrch->search->text;
2043 int32_t *patternce = strsrch->pattern.CE;
2044 int32_t patterncelength = strsrch->pattern.CELength;
2047 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
2081 * @param strsrch string search data
2092 inline UBool checkPreviousExactMatch(UStringSearch *strsrch,
2097 int32_t end = ucol_getOffset(strsrch->textIter);
2098 if (!checkPreviousExactContractionMatch(strsrch, textoffset, &end, status)
2105 if (checkRepeatedMatch(strsrch, *textoffset, end) ||
2106 !isBreakUnit(strsrch, *textoffset, end) ||
2107 hasAccentsBeforeMatch(strsrch, *textoffset, end) ||
2108 !checkIdentical(strsrch, *textoffset, end) ||
2109 hasAccentsAfterMatch(strsrch, *textoffset, end)) {
2111 *textoffset = getPreviousBaseOffset(strsrch->search->text,
2117 if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
2118 checkBreakBoundary(strsrch, textoffset, &end);
2121 strsrch
2122 strsrch->search->matchedLength = end - *textoffset;
2138 * @param strsrch string search match
2146 int32_t doPreviousCanonicalSuffixMatch(UStringSearch *strsrch,
2151 const UChar *text = strsrch->search->text;
2155 if (!(getFCD(text, &tempend, strsrch->search->textLength) &
2160 end = getNextBaseOffset(text, end, strsrch->search->textLength);
2174 UCollationElements *coleiter = strsrch->utilIter;
2176 UChar *rearrange = strsrch->canonicalSuffixAccents;
2194 strsrch->canonicalPrefixAccents,
2195 strsrch->search->text + start,
2197 strsrch->canonicalSuffixAccents,
2204 if (checkCollationMatch(strsrch, coleiter)) {
2226 * @param strsrch string search data
2233 int32_t doPreviousCanonicalPrefixMatch(UStringSearch *strsrch,
2237 const UChar *text = strsrch->search->text;
2238 const UCollator *collator = strsrch->collator;
2246 ucol_unsafeCP(strsrch->canonicalPrefixAccents[
2247 u_strlen(strsrch->canonicalPrefixAccents) - 1
2250 strsrch->search->textLength);
2254 strsrch->canonicalPrefixAccents,
2259 safetextlength = u_strlen(strsrch->canonicalPrefixAccents);
2260 safetext = strsrch->canonicalPrefixAccents;
2263 UCollationElements *coleiter = strsrch->utilIter;
2268 int32_t *ce = strsrch->pattern.CE;
2269 int32_t celength = strsrch->pattern.CELength;
2272 int32_t prefixlength = u_strlen(strsrch->canonicalPrefixAccents);
2278 cleanUpSafeText(strsrch, safetext, safebuffer);
2284 if (coleiter == strsrch->textIter) {
2285 cleanUpSafeText(strsrch, safetext, safebuffer);
2288 cleanUpSafeText(strsrch, safetext, safebuffer);
2290 coleiter = strsrch->textIter;
2296 textce = getCE(strsrch, textce);
2302 cleanUpSafeText(strsrch, safetext, safebuffer);
2308 cleanUpSafeText(strsrch, safetext, safebuffer);
2312 int32_t result = doPreviousCanonicalSuffixMatch(strsrch,
2316 setColEIterOffset(strsrch->textIter, result);
2333 cleanUpSafeText(strsrch, safetext, safebuffer);
2340 setColEIterOffset(strsrch->textIter, result);
2341 setExpansionSuffix(strsrch->textIter, leftoverces);
2362 * @param strsrch string search data
2369 UBool doPreviousCanonicalMatch(UStringSearch *strsrch,
2373 const UChar *text = strsrch->search->text;
2375 int32_t textlength = strsrch->search->textLength;
2377 UCollationElements *coleiter = strsrch->textIter;
2379 if (strsrch->pattern.hasSuffixAccents) {
2380 offset = doPreviousCanonicalSuffixMatch(strsrch, textoffset,
2390 if (!strsrch->pattern.hasPrefixAccents) {
2408 UChar *rearrange = strsrch->canonicalPrefixAccents;
2424 int32_t offset = doPreviousCanonicalPrefixMatch(strsrch,
2439 * @param strsrch string search data
2446 UBool checkPreviousCanonicalContractionMatch(UStringSearch *strsrch,
2450 UCollationElements *coleiter = strsrch->textIter;
2451 int32_t textlength = strsrch->search->textLength;
2453 const UCollator *collator = strsrch->collator;
2454 const UChar *text = strsrch->search->text;
2483 int32_t *patternce = strsrch->pattern.CE;
2484 int32_t patterncelength = strsrch->pattern.CELength;
2487 int32_t ce = getCE(strsrch, ucol_previous(coleiter, status));
2505 ce = getCE(strsrch, ucol_previous(coleiter, status));
2509 ce = getCE(strsrch, ucol_previous(coleiter, status));
2536 * @param strsrch string search data
2544 inline UBool checkPreviousCanonicalMatch(UStringSearch *strsrch,
2549 UCollationElements *coleiter = strsrch->textIter;
2551 if ((strsrch->pattern.hasSuffixAccents &&
2552 strsrch->canonicalSuffixAccents[0]) ||
2553 (strsrch->pattern.hasPrefixAccents &&
2554 strsrch->canonicalPrefixAccents[0])) {
2555 strsrch->search->matchedIndex = *textoffset;
2556 strsrch->search->matchedLength =
2557 getNextUStringSearchBaseOffset(strsrch,
2564 if (!checkPreviousCanonicalContractionMatch(strsrch, textoffset, &end,
2570 end = getNextUStringSearchBaseOffset(strsrch, end);
2572 if (checkRepeatedMatch(strsrch, *textoffset, end) ||
2573 !isBreakUnit(strsrch, *textoffset, end) ||
2574 !checkIdentical(strsrch, *textoffset, end)) {
2576 *textoffset = getPreviousBaseOffset(strsrch->search->text,
2581 strsrch->search->matchedIndex = *textoffset;
2582 strsrch->search->matchedLength = end - *textoffset;
2750 U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch)
2752 if (strsrch) {
2753 if (strsrch->pattern.CE != strsrch->pattern.CEBuffer &&
2754 strsrch->pattern.CE) {
2755 uprv_free(strsrch->pattern.CE);
2758 if (strsrch->pattern.PCE != NULL &&
2759 strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
2760 uprv_free(strsrch->pattern.PCE);
2763 delete strsrch->textProcessedIter;
2764 ucol_closeElements(strsrch->textIter);
2765 ucol_closeElements(strsrch->utilIter);
2767 if (strsrch->ownCollator && strsrch->collator) {
2768 ucol_close((UCollator *)strsrch->collator);
2772 if (strsrch->search->internalBreakIter) {
2773 ubrk_close(strsrch->search->internalBreakIter);
2777 uprv_free(strsrch->search);
2778 uprv_free(strsrch);
2784 UBool initTextProcessedIter(UStringSearch *strsrch, UErrorCode *status) {
2786 if (strsrch->textProcessedIter == NULL) {
2787 strsrch->textProcessedIter = new icu::UCollationPCE(strsrch->textIter);
2788 if (strsrch->textProcessedIter == NULL) {
2793 strsrch->textProcessedIter->init(strsrch->textIter);
2802 U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
2806 if (U_SUCCESS(*status) && strsrch) {
2807 if (isOutOfBounds(strsrch->search->textLength, position)) {
2811 setColEIterOffset(strsrch->textIter, position);
2813 strsrch->search->matchedIndex = USEARCH_DONE;
2814 strsrch->search->matchedLength = 0;
2815 strsrch->search->reset = FALSE;
2819 U_CAPI int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch)
2821 if (strsrch) {
2822 int32_t result = ucol_getOffset(strsrch->textIter);
2823 if (isOutOfBounds(strsrch->search->textLength, result)) {
2831 U_CAPI void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch,
2836 if (U_SUCCESS(*status) && strsrch) {
2840 strsrch->search->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
2843 strsrch->search->isCanonicalMatch = (value == USEARCH_ON ? TRUE :
2848 strsrch->search->elementComparisonType = (int16_t)value;
2850 strsrch->search->elementComparisonType = 0;
2864 const UStringSearch *strsrch,
2867 if (strsrch) {
2870 return (strsrch->search->isOverlap == TRUE ? USEARCH_ON :
2873 return (strsrch->search->isCanonicalMatch == TRUE ? USEARCH_ON :
2877 int16_t value = strsrch->search->elementComparisonType;
2892 const UStringSearch *strsrch)
2894 if (strsrch == NULL) {
2897 return strsrch->search->matchedIndex;
2901 U_CAPI int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch,
2909 if (strsrch == NULL || resultCapacity < 0 || (resultCapacity > 0 &&
2915 int32_t copylength = strsrch->search->matchedLength;
2916 int32_t copyindex = strsrch->search->matchedIndex;
2926 uprv_memcpy(result, strsrch->search->text + copyindex,
2930 strsrch->search->matchedLength, status);
2934 const UStringSearch *strsrch)
2936 if (strsrch) {
2937 return strsrch->search->matchedLength;
2944 U_CAPI void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch,
2948 if (U_SUCCESS(*status) && strsrch) {
2949 strsrch->search->breakIter = breakiter;
2951 ubrk_setText(breakiter, strsrch->search->text,
2952 strsrch->search->textLength, status);
2958 usearch_getBreakIterator(const UStringSearch *strsrch)
2960 if (strsrch) {
2961 return strsrch->search->breakIter;
2968 U_CAPI void U_EXPORT2 usearch_setText( UStringSearch *strsrch,
2974 if (strsrch == NULL || text == NULL || textlength < -1 ||
2982 strsrch->search->text = text;
2983 strsrch->search->textLength = textlength;
2984 ucol_setText(strsrch->textIter, text, textlength, status);
2985 strsrch->search->matchedIndex = USEARCH_DONE;
2986 strsrch->search->matchedLength = 0;
2987 strsrch->search->reset = TRUE;
2989 if (strsrch->search->breakIter != NULL) {
2990 ubrk_setText(strsrch->search->breakIter, text,
2993 ubrk_setText(strsrch->search->internalBreakIter, text, textlength, status);
2999 U_CAPI const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch,
3002 if (strsrch) {
3003 *length = strsrch->search->textLength;
3004 return strsrch->search->text;
3009 U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
3019 if (strsrch) {
3020 delete strsrch->textProcessedIter;
3021 strsrch->textProcessedIter = NULL;
3022 ucol_closeElements(strsrch->textIter);
3023 ucol_closeElements(strsrch->utilIter);
3024 strsrch->textIter = strsrch->utilIter = NULL;
3025 if (strsrch->ownCollator && (strsrch->collator != collator)) {
3026 ucol_close((UCollator *)strsrch->collator);
3027 strsrch->ownCollator = FALSE;
3029 strsrch->collator = collator;
3030 strsrch->strength = ucol_getStrength(collator);
3031 strsrch->ceMask = getMask(strsrch->strength);
3033 ubrk_close(strsrch->search->internalBreakIter);
3034 strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(collator, ULOC_VALID_LOCALE, status),
3035 strsrch->search->text, strsrch->search->textLength, status);
3038 strsrch->toShift =
3042 strsrch->variableTop = ucol_getVariableTop(collator, status);
3043 strsrch->textIter = ucol_openElements(collator,
3044 strsrch->search->text,
3045 strsrch->search->textLength,
3047 strsrch->utilIter = ucol_openElements(
3048 collator, strsrch->pattern.text, strsrch->pattern.textLength, status);
3050 initialize(strsrch, status);
3057 uprv_init_pce(strsrch->textIter);
3058 uprv_init_pce(strsrch->utilIter);
3063 U_CAPI UCollator * U_EXPORT2 usearch_getCollator(const UStringSearch *strsrch)
3065 if (strsrch) {
3066 return (UCollator *)strsrch->collator;
3071 U_CAPI void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch,
3077 if (strsrch == NULL || pattern == NULL) {
3088 strsrch->pattern.text = pattern;
3089 strsrch->pattern.textLength = patternlength;
3090 initialize(strsrch, status);
3096 usearch_getPattern(const UStringSearch *strsrch,
3099 if (strsrch) {
3100 *length = strsrch->pattern.textLength;
3101 return strsrch->pattern.text;
3108 U_CAPI int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch,
3111 if (strsrch && U_SUCCESS(*status)) {
3112 strsrch->search->isForwardSearching = TRUE;
3113 usearch_setOffset(strsrch, 0, status);
3115 return usearch_next(strsrch, status);
3121 U_CAPI int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch,
3125 if (strsrch && U_SUCCESS(*status)) {
3126 strsrch->search->isForwardSearching = TRUE;
3128 usearch_setOffset(strsrch, position, status);
3130 return usearch_next(strsrch, status);
3136 U_CAPI int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch,
3139 if (strsrch && U_SUCCESS(*status)) {
3140 strsrch->search->isForwardSearching = FALSE;
3141 usearch_setOffset(strsrch, strsrch->search->textLength, status);
3143 return usearch_previous(strsrch, status);
3149 U_CAPI int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch,
3153 if (strsrch && U_SUCCESS(*status)) {
3154 strsrch->search->isForwardSearching = FALSE;
3156 usearch_setOffset(strsrch, position, status);
3158 return usearch_previous(strsrch, status);
3186 U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
3189 if (U_SUCCESS(*status) && strsrch) {
3192 int32_t offset = usearch_getOffset(strsrch);
3193 USearch *search = strsrch->search;
3200 (offset + strsrch->pattern.defaultShiftSize > textlength ||
3204 setMatchNotFound(strsrch);
3213 setMatchNotFound(strsrch);
3233 if (strsrch->pattern.CELength == 0) {
3242 setColEIterOffset(strsrch->textIter, search->matchedIndex);
3252 ucol_setOffset(strsrch->textIter, offset + 1, status);
3255 ucol_setOffset(strsrch->textIter,
3269 usearch_handleNextCanonical(strsrch, status);
3272 usearch_handleNextExact(strsrch, status);
3282 ucol_setOffset(strsrch->textIter, search->textLength, status);
3284 ucol_setOffset(strsrch->textIter, search->matchedIndex, status);
3294 U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
3297 if (U_SUCCESS(*status) && strsrch) {
3299 USearch *search = strsrch->search;
3304 setColEIterOffset(strsrch->textIter, offset);
3307 offset = usearch_getOffset(strsrch);
3326 (offset < strsrch->pattern.defaultShiftSize ||
3328 matchedindex < strsrch->pattern.defaultShiftSize)))) {
3330 setMatchNotFound(strsrch);
3337 setMatchNotFound(strsrch);
3344 if (strsrch->pattern.CELength == 0) {
3348 setMatchNotFound(strsrch);
3353 setColEIterOffset(strsrch->textIter, search->matchedIndex);
3359 if (strsrch->search->isCanonicalMatch) {
3361 usearch_handlePreviousCanonical(strsrch, status);
3365 usearch_handlePreviousExact(strsrch, status);
3382 U_CAPI void U_EXPORT2 usearch_reset(UStringSearch *strsrch)
3389 if (strsrch) {
3397 strsrch->collator);
3398 if ((strsrch->strength < UCOL_QUATERNARY && newStrength >= UCOL_QUATERNARY) ||
3399 (strsrch->strength >= UCOL_QUATERNARY && newStrength < UCOL_QUATERNARY)) {
3403 strsrch->strength = ucol_getStrength(strsrch->collator);
3404 ceMask = getMask(strsrch->strength);
3405 if (strsrch->ceMask != ceMask) {
3406 strsrch->ceMask = ceMask;
3411 shift = ucol_getAttribute(strsrch->collator, UCOL_ALTERNATE_HANDLING,
3413 if (strsrch->toShift != shift) {
3414 strsrch->toShift = shift;
3419 varTop = ucol_getVariableTop(strsrch->collator, &status);
3420 if (strsrch->variableTop != varTop) {
3421 strsrch->variableTop = varTop;
3425 initialize(strsrch, &status);
3427 ucol_setText(strsrch->textIter, strsrch->search->text,
3428 strsrch->search->textLength,
3430 strsrch->search->matchedLength = 0;
3431 strsrch->search->matchedIndex = USEARCH_DONE;
3432 strsrch->search->isOverlap = FALSE;
3433 strsrch->search->isCanonicalMatch = FALSE;
3434 strsrch->search->elementComparisonType = 0;
3435 strsrch->search->isForwardSearching = TRUE;
3436 strsrch->search->reset = TRUE;
3619 static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex) {
3621 const UChar *text = strsrch->search->text;
3622 int32_t textLen = strsrch->search->textLength;
3658 UBreakIterator *breakiterator = strsrch->search->breakIter;
3661 breakiterator = strsrch->search->internalBreakIter;
3681 static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index) {
3683 const UChar *text = strsrch->search->text;
3684 int32_t textLen = strsrch->search->textLength;
3709 UBreakIterator *breakiterator = strsrch->search->breakIter;
3712 breakiterator = strsrch->search->internalBreakIter;
3723 static UBool onBreakBoundaries(const UStringSearch *strsrch, int32_t start, int32_t end)
3726 UBreakIterator *breakiterator = strsrch->search->breakIter;
3810 U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
3825 for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
3826 printf(" %8x", strsrch->pattern.CE[ii]);
3835 if(strsrch->pattern.CELength == 0 ||
3837 startIdx > strsrch->search->textLength ||
3838 strsrch->pattern.CE == NULL) {
3843 if (strsrch->pattern.PCE == NULL) {
3844 initializePatternPCETable(strsrch, status);
3847 ucol_setOffset(strsrch->textIter, startIdx, status);
3848 CEBuffer ceb(strsrch, status);
3894 for (patIx=0; patIx<strsrch->pattern.PCELength; patIx++) {
3895 patCE = strsrch->pattern.PCE[patIx];
3900 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
3915 targetIxOffset += strsrch->pattern.PCELength; // this is now the offset in target CE space to end of the match so far
3947 if (strsrch->search->elementComparisonType == 0) {
3965 UCompareCEsResult ceMatch = compareCE64s(nextCEI->ce, patCE, strsrch->search->elementComparisonType);
3989 if (!isBreakBoundary(strsrch, mStart)) {
4013 if (minLimit == lastCEI->highIndex && isBreakBoundary(strsrch, minLimit)) {
4016 int32_t nba = nextBoundaryAfter(strsrch, minLimit);
4035 if (!isBreakBoundary(strsrch, mLimit)) {
4039 if (! checkIdentical(strsrch, mStart, mLimit)) {
4077 U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
4092 for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
4093 printf(" %8x", strsrch->pattern.CE[ii]);
4102 if(strsrch->pattern.CELength == 0 ||
4104 startIdx > strsrch->search->textLength ||
4105 strsrch->pattern.CE == NULL) {
4110 if (strsrch->pattern.PCE == NULL) {
4111 initializePatternPCETable(strsrch, status);
4114 CEBuffer ceb(strsrch, status);
4126 if (startIdx < strsrch->search->textLength) {
4127 UBreakIterator *bi = strsrch->search->internalBreakIter;
4130 ucol_setOffset(strsrch->textIter, next, status);
4138 ucol_setOffset(strsrch->textIter, startIdx, status);
4175 for (patIx = strsrch->pattern.PCELength - 1; patIx >= 0; patIx -= 1) {
4176 int64_t patCE = strsrch->pattern.PCE[patIx];
4178 targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 - patIx + targetIxOffset);
4182 UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
4214 const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 + targetIxOffset);
4223 if (!isBreakBoundary(strsrch, mStart)) {
4256 int32_t nba = nextBoundaryAfter(strsrch, minLimit);
4270 if (!isBreakBoundary(strsrch, mLimit)) {
4279 int32_t nba = nextBoundaryAfter(strsrch, minLimit);
4290 if (! checkIdentical(strsrch, mStart, mLimit)) {
4330 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
4333 setMatchNotFound(strsrch);
4338 UCollationElements *coleiter = strsrch->textIter;
4339 int32_t textlength = strsrch->search->textLength;
4340 int32_t *patternce = strsrch->pattern.CE;
4341 int32_t patterncelength = strsrch->pattern.CELength;
4347 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER,
4367 targetce = getCE(strsrch, targetce);
4397 targetce = getCE(strsrch, targetce);
4413 textoffset = shiftForward(strsrch, textoffset, lastce,
4420 if (checkNextExactMatch(strsrch, &textoffset, status)) {
4422 setColEIterOffset(coleiter, strsrch->search->matchedIndex);
4426 setMatchNotFound(strsrch);
4429 int32_t textOffset = ucol_getOffset(strsrch->textIter);
4433 if (usearch_search(strsrch, textOffset, &start, &end, status)) {
4434 strsrch->search->matchedIndex = start;
4435 strsrch->search->matchedLength = end - start;
4438 setMatchNotFound(strsrch);
4444 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
4447 setMatchNotFound(strsrch);
4452 UCollationElements *coleiter = strsrch->textIter;
4453 int32_t textlength = strsrch->search->textLength;
4454 int32_t *patternce = strsrch->pattern.CE;
4455 int32_t patterncelength = strsrch->pattern.CELength;
4458 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4460 textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER,
4462 strsrch->canonicalPrefixAccents[0] = 0;
4463 strsrch->canonicalSuffixAccents[0] = 0;
4483 targetce = getCE(strsrch, targetce);
4505 targetce = getCE(strsrch, targetce);
4517 strsrch->canonicalPrefixAccents[0] = 0;
4518 strsrch->canonicalSuffixAccents[0] = 0;
4522 found = doNextCanonicalMatch(strsrch, textoffset, status);
4529 textoffset = shiftForward(strsrch, textoffset, lastce,
4536 if (checkNextCanonicalMatch(strsrch, &textoffset, status)) {
4537 setColEIterOffset(coleiter, strsrch->search->matchedIndex);
4541 setMatchNotFound(strsrch);
4544 int32_t textOffset = ucol_getOffset(strsrch->textIter);
4548 if (usearch_search(strsrch, textOffset, &start, &end, status)) {
4549 strsrch->search->matchedIndex = start;
4550 strsrch->search->matchedLength = end - start;
4553 setMatchNotFound(strsrch);
4559 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
4562 setMatchNotFound(strsrch);
4567 UCollationElements *coleiter = strsrch->textIter;
4568 int32_t *patternce = strsrch->pattern.CE;
4569 int32_t patterncelength = strsrch->pattern.CELength;
4575 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4576 textoffset = strsrch->search->matchedIndex;
4579 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER,
4602 targetce = getCE(strsrch, targetce);
4606 if (targetce == UCOL_IGNORABLE && strsrch->strength != UCOL_PRIMARY) {
4630 targetce = getCE(strsrch, targetce);
4647 textoffset = reverseShift(strsrch, textoffset, targetce,
4653 if (checkPreviousExactMatch(strsrch, &textoffset, status)) {
4658 setMatchNotFound(strsrch);
4663 if (strsrch->search->isOverlap) {
4664 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4665 textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
4668 initializePatternPCETable(strsrch, status);
4669 if (!initTextProcessedIter(strsrch, status)) {
4670 setMatchNotFound(strsrch);
4673 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCEs++) {
4674 int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
4681 setMatchNotFound(strsrch);
4684 textOffset = ucol_getOffset(strsrch->textIter);
4687 textOffset = ucol_getOffset(strsrch->textIter);
4693 if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
4694 strsrch->search->matchedIndex = start;
4695 strsrch->search->matchedLength = end - start;
4698 setMatchNotFound(strsrch);
4704 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
4708 setMatchNotFound(strsrch);
4713 UCollationElements *coleiter = strsrch->textIter;
4714 int32_t *patternce = strsrch->pattern.CE;
4715 int32_t patterncelength = strsrch->pattern.CELength;
4718 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4723 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4724 textoffset = strsrch->search->matchedIndex;
4727 textoffset = reverseShift(strsrch, textoffset, UCOL_NULLORDER,
4729 strsrch->canonicalPrefixAccents[0] = 0;
4730 strsrch->canonicalSuffixAccents[0] = 0;
4750 targetce = getCE(strsrch, targetce);
4776 targetce = getCE(strsrch, targetce);
4788 strsrch->canonicalPrefixAccents[0] = 0;
4789 strsrch->canonicalSuffixAccents[0] = 0;
4793 found = doPreviousCanonicalMatch(strsrch, textoffset, status);
4800 textoffset = reverseShift(strsrch, textoffset, targetce,
4806 if (checkPreviousCanonicalMatch(strsrch, &textoffset, status)) {
4811 setMatchNotFound(strsrch);
4816 if (strsrch->search->isOverlap) {
4817 if (strsrch->search->matchedIndex != USEARCH_DONE) {
4818 textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
4821 initializePatternPCETable(strsrch, status);
4822 if (!initTextProcessedIter(strsrch, status)) {
4823 setMatchNotFound(strsrch);
4826 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCEs++) {
4827 int64_t pce = strsrch->textProcessedIter->nextProcessed(NULL, NULL, status);
4834 setMatchNotFound(strsrch);
4837 textOffset = ucol_getOffset(strsrch->textIter);
4840 textOffset = ucol_getOffset(strsrch->textIter);
4846 if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
4847 strsrch->search->matchedIndex = start;
4848 strsrch->search->matchedLength = end - start;
4851 setMatchNotFound(strsrch);