Home | History | Annotate | Download | only in i18n

Lines Matching refs:pattern

280 * Initializing the ce table for a pattern.
282 * Table size will be estimated by the size of the pattern text. Table
295 UPattern *pattern = &(strsrch->pattern);
297 int32_t *cetable = pattern->cesBuffer;
298 uint32_t patternlength = pattern->textLength;
302 coleiter = ucol_openElements(strsrch->collator, pattern->text,
310 ucol_setText(coleiter, pattern->text, pattern->textLength, status);
316 if (pattern->ces != cetable && pattern->ces) {
317 uprv_free(pattern->ces);
336 if (cetable != temp && cetable != pattern->cesBuffer) {
345 pattern->ces = cetable;
346 pattern->cesLength = offset;
352 * Initializing the pce table for a pattern.
354 * Table size will be estimated by the size of the pattern text. Table
367 UPattern *pattern = &(strsrch->pattern);
369 int64_t *pcetable = pattern->pcesBuffer;
370 uint32_t patternlength = pattern->textLength;
374 coleiter = ucol_openElements(strsrch->collator, pattern->text,
381 ucol_setText(coleiter, pattern->text, pattern->textLength, status);
387 if (pattern->pces != pcetable && pattern->pces != NULL) {
388 uprv_free(pattern->pces);
413 if (pcetable != temp && pcetable != pattern->pcesBuffer) {
422 pattern->pces = pcetable;
423 pattern->pcesLength = offset;
429 * Initializes the pattern struct.
434 * @return expansionsize the total expansion size of the pattern
440 UPattern *pattern = &(strsrch->pattern);
441 const UChar *patterntext = pattern->text;
442 int32_t length = pattern->textLength;
445 // Since the strength is primary, accents are ignored in the pattern.
447 pattern->hasPrefixAccents = 0;
448 pattern->hasSuffixAccents = 0;
450 pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >>
454 pattern->hasSuffixAccents = getFCD(patterntext, &index, length) &
459 if (strsrch->pattern.pces != NULL) {
460 if (strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
461 uprv_free(strsrch->pattern.pces);
464 strsrch->pattern.pces = NULL;
476 * @param cetable table containing pattern ce
477 * @param cesize size of the pattern ces
520 * Building of the pattern collation element list and the boyer moore strsrch
525 * a number of characters in the text and tries to match the pattern from that
529 * possible representation of the pattern. Anyways, we'll err on the smaller
532 * pattern into 3 parts, the prefix accents (PA), the middle string bounded by
541 * If pattern has no non-ignorable ce, we return a illegal argument error.
551 if (U_SUCCESS(*status) && strsrch->pattern.cesLength > 0) {
552 UPattern *pattern = &strsrch->pattern;
553 int32_t cesize = pattern->cesLength;
557 pattern->defaultShiftSize = minlength;
558 setShiftTable(pattern->shift, pattern->backShift, pattern->ces,
562 strsrch->pattern.defaultShiftSize = 0;
634 for (int32_t count = 0; count < strsrch->pattern.cesLength;
641 if (U_FAILURE(status) || ce != strsrch->pattern.ces[count]) {
694 * Gets the next base character offset depending on the string search pattern
707 if (strsrch->pattern.hasSuffixAccents &&
727 * @param patternceindex index of the ce within the pattern ce buffer which
737 UPattern *pattern = &(strsrch->pattern);
739 int32_t shift = pattern->shift[hashFromCE32(ce)];
742 int32_t adjust = pattern->cesLength - patternceindex;
749 textoffset += pattern->defaultShiftSize;
836 if (strsrch->pattern.hasPrefixAccents) {
876 uint32_t firstce = strsrch->pattern.ces[0];
904 * the first pattern ce does not match the first ce of the character, we bail.
924 if (strsrch->pattern.hasPrefixAccents) {
929 int32_t firstce = strsrch->pattern.ces[0];
1005 if (strsrch->pattern.hasSuffixAccents) {
1011 int32_t firstce = strsrch->pattern.ces[0];
1022 while (count < strsrch->pattern.cesLength) {
1089 UnicodeString(FALSE, strsrch->pattern.text, strsrch->pattern.textLength), p2, status);
1206 int32_t *patternce = strsrch->pattern.ces;
1207 int32_t patterncelength = strsrch->pattern.cesLength;
1400 * pattern in string search data
1409 int patternceindex = strsrch->pattern.cesLength;
1410 int32_t *patternce = strsrch->pattern.ces;
1430 * match with the pattern.
1435 * step 2: check if any of the generated substrings matches the pattern.
1609 int32_t *ce = strsrch->pattern.ces;
1610 int32_t celength = strsrch->pattern.cesLength;
1692 * equivalents and check their corresponding ces with the pattern ce.
1695 * match with the pattern.
1700 * step 2: check if any of the generated substrings matches the pattern.
1720 if (strsrch->pattern.hasPrefixAccents) {
1731 if (!strsrch->pattern.hasSuffixAccents) {
1777 * pattern data
1787 if (strsrch->pattern.hasPrefixAccents && textoffset > 0) {
1849 int32_t *patternce = strsrch->pattern.ces;
1850 int32_t patterncelength = strsrch->pattern.cesLength;
1867 // pure accent pattern is matched without rearrangement
1917 if ((strsrch->pattern.hasSuffixAccents &&
1919 (strsrch->pattern.hasPrefixAccents &&
1960 * @param patternceindex index of the ce within the pattern ce buffer which
1975 textoffset -= strsrch->pattern.defaultShiftSize;
1980 int32_t shift = strsrch->pattern.backShift[hashFromCE32(ce)];
1991 textoffset -= strsrch->pattern.defaultShiftSize;
2047 int32_t *patternce = strsrch->pattern.ces;
2048 int32_t patterncelength = strsrch->pattern.cesLength;
2134 * match with the pattern.
2139 * step 2: check if any of the generated substrings matches the pattern.
2272 int32_t *ce = strsrch->pattern.ces;
2273 int32_t celength = strsrch->pattern.cesLength;
2355 * canonical equivalents and check their corresponding ces with the pattern ce.
2358 * match with the pattern.
2363 * step 2: check if any of the generated substrings matches the pattern.
2383 if (strsrch->pattern.hasSuffixAccents) {
2394 if (!strsrch->pattern.hasPrefixAccents) {
2487 int32_t *patternce = strsrch->pattern.ces;
2488 int32_t patterncelength = strsrch->pattern.cesLength;
2505 // pure accent pattern is matched without rearrangement
2555 if ((strsrch->pattern.hasSuffixAccents &&
2557 (strsrch->pattern.hasPrefixAccents &&
2593 U_CAPI UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern,
2613 // pattern, text checks are done in usearch_openFromCollator
2614 UStringSearch *result = usearch_openFromCollator(pattern,
2634 const UChar *pattern,
2651 if (pattern == NULL || text == NULL || collator == NULL) {
2673 patternlength = u_strlen(pattern);
2711 result->pattern.text = pattern;
2712 result->pattern.textLength = patternlength;
2713 result->pattern.ces = NULL;
2714 result->pattern.pces = NULL;
2757 if (strsrch->pattern.ces != strsrch->pattern.cesBuffer &&
2758 strsrch->pattern.ces) {
2759 uprv_free(strsrch->pattern.ces);
2762 if (strsrch->pattern.pces != NULL &&
2763 strsrch->pattern.pces != strsrch->pattern.pcesBuffer) {
2764 uprv_free(strsrch->pattern.pces);
3052 collator, strsrch->pattern.text, strsrch->pattern.textLength, status);
3076 const UChar *pattern,
3081 if (strsrch == NULL || pattern == NULL) {
3086 patternlength = u_strlen(pattern);
3092 strsrch->pattern.text = pattern;
3093 strsrch->pattern.textLength = patternlength;
3104 *length = strsrch->pattern.textLength;
3105 return strsrch->pattern.text;
3179 * be 2 matches within the same normalization is when the pattern is consists
3204 (offset + strsrch->pattern.defaultShiftSize > textlength ||
3237 if (strsrch->pattern.cesLength == 0) {
3330 (offset < strsrch->pattern.defaultShiftSize ||
3332 matchedindex < strsrch->pattern.defaultShiftSize)))) {
3338 // Could check pattern length, but the
3348 if (strsrch->pattern.cesLength == 0) {
3488 bufSize = ss->pattern.pcesLength + CEBUFFER_EXTRA;
3490 const UChar * patText = ss->pattern.text;
3492 const UChar * patTextLimit = patText + ss->pattern.textLength;
3850 printf("Pattern CEs\n");
3851 for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
3852 printf(" %8x", strsrch->pattern.ces[ii]);
3861 if(strsrch->pattern.cesLength == 0 ||
3864 strsrch->pattern.ces == NULL) {
3869 if (strsrch->pattern.pces == NULL) {
3920 for (patIx=0; patIx<strsrch->pattern.pcesLength; patIx++) {
3921 patCE = strsrch->pattern.pces[patIx];
3923 // Compare CE from target string with CE from the pattern.
3941 targetIxOffset += strsrch->pattern.pcesLength; // this is now the offset in target CE space to end of the match so far
3988 // it is part of the last target element matched by the pattern;
4010 // This can happen if the pattern itself begins with a combining char, and
4053 // settings (such as space) and which extend beyond the pattern match.
4150 printf("Pattern CEs\n");
4151 for (int ii=0; ii<strsrch->pattern.cesLength; ii++) {
4152 printf(" %8x", strsrch->pattern.ces[ii]);
4161 if(strsrch->pattern.cesLength == 0 ||
4164 strsrch->pattern.ces == NULL) {
4169 if (strsrch->pattern.pces == NULL) {
4216 // But patIx is 0 at the beginning of the pattern and increases toward the end.
4217 // So this loop performs a comparison starting with the end of pattern, and prcessd toward the beginning of the pattern
4234 for (patIx = strsrch->pattern.pcesLength - 1; patIx >= 0; patIx -= 1) {
4235 int64_t patCE = strsrch->pattern.pces[patIx];
4237 targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 - patIx + targetIxOffset);
4238 // Compare CE from target string with CE from the pattern.
4273 const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.pcesLength - 1 + targetIxOffset);
4277 // This can happen if the pattern itself begins with a combining char, and
4336 // settings (such as space) and which extend beyond the pattern match.
4431 int32_t *patternce = strsrch->pattern.ces;
4432 int32_t patterncelength = strsrch->pattern.cesLength;
4450 // finding the last pattern ce match, imagine composite characters
4451 // for example: search for pattern A in text \u00C0
4461 // normalization and pattern \u0300, where \u0315 is ignorable
4545 int32_t *patternce = strsrch->pattern.ces;
4546 int32_t patterncelength = strsrch->pattern.cesLength;
4549 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4566 // finding the last pattern ce match, imagine composite characters
4567 // for example: search for pattern A in text \u00C0
4659 int32_t *patternce = strsrch->pattern.ces;
4660 int32_t patterncelength = strsrch->pattern.cesLength;
4684 // finding the first pattern ce match, imagine composite
4685 // characters. for example: search for pattern \u0300 in text
4764 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {
4805 int32_t *patternce = strsrch->pattern.ces;
4806 int32_t patterncelength = strsrch->pattern.cesLength;
4809 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4832 // finding the first pattern ce match, imagine composite
4833 // characters. for example: search for pattern \u0300 in text
4917 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.pcesLength - 1; nPCEs++) {