Home | History | Annotate | Download | only in i18n

Lines Matching refs:pattern

284 * Initializing the ce table for a pattern.
286 * Table size will be estimated by the size of the pattern text. Table
299 UPattern *pattern = &(strsrch->pattern);
301 int32_t *cetable = pattern->CEBuffer;
302 uint32_t patternlength = pattern->textLength;
306 coleiter = ucol_openElements(strsrch->collator, pattern->text,
314 uprv_init_collIterate(strsrch->collator, pattern->text,
315 pattern->textLength,
323 if (pattern->CE != cetable && pattern->CE) {
324 uprv_free(pattern->CE);
343 if (cetable != temp && cetable != pattern->CEBuffer) {
352 pattern->CE = cetable;
353 pattern->CELength = offset;
359 * Initializing the pce table for a pattern.
361 * Table size will be estimated by the size of the pattern text. Table
374 UPattern *pattern = &(strsrch->pattern);
376 int64_t *pcetable = pattern->PCEBuffer;
377 uint32_t patternlength = pattern->textLength;
381 coleiter = ucol_openElements(strsrch->collator, pattern->text,
388 uprv_init_collIterate(strsrch->collator, pattern->text,
389 pattern->textLength,
397 if (pattern->PCE != pcetable && pattern->PCE != NULL) {
398 uprv_free(pattern->PCE);
423 if (pcetable != temp && pcetable != pattern->PCEBuffer) {
432 pattern->PCE = pcetable;
433 pattern->PCELength = offset;
439 * Initializes the pattern struct.
444 * @return expansionsize the total expansion size of the pattern
449 UPattern *pattern = &(strsrch->pattern);
450 const UChar *patterntext = pattern->text;
451 int32_t length = pattern->textLength;
454 // Since the strength is primary, accents are ignored in the pattern.
456 pattern->hasPrefixAccents = 0;
457 pattern->hasSuffixAccents = 0;
459 pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >>
463 pattern->hasSuffixAccents = getFCD(patterntext, &index, length) &
468 if (strsrch->pattern.PCE != NULL) {
469 if (strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
470 uprv_free(strsrch->pattern.PCE);
473 strsrch->pattern.PCE = NULL;
485 * @param cetable table containing pattern ce
486 * @param cesize size of the pattern ces
529 * Building of the pattern collation element list and the boyer moore strsrch
534 * a number of characters in the text and tries to match the pattern from that
538 * possible representation of the pattern. Anyways, we'll err on the smaller
541 * pattern into 3 parts, the prefix accents (PA), the middle string bounded by
550 * If pattern has no non-ignorable ce, we return a illegal argument error.
560 if (U_SUCCESS(*status) && strsrch->pattern.CELength > 0) {
561 UPattern *pattern = &strsrch->pattern;
562 int32_t cesize = pattern->CELength;
566 pattern->defaultShiftSize = minlength;
567 setShiftTable(pattern->shift, pattern->backShift, pattern->CE,
571 strsrch->pattern.defaultShiftSize = 0;
643 for (int32_t count = 0; count < strsrch->pattern.CELength;
650 if (U_FAILURE(status) || ce != strsrch->pattern.CE[count]) {
703 * Gets the next base character offset depending on the string search pattern
716 if (strsrch->pattern.hasSuffixAccents &&
736 * @param patternceindex index of the ce within the pattern ce buffer which
746 UPattern *pattern = &(strsrch->pattern);
748 int32_t shift = pattern->shift[hash(ce)];
751 int32_t adjust = pattern->CELength - patternceindex;
758 textoffset += pattern->defaultShiftSize;
845 if (strsrch->pattern.hasPrefixAccents) {
885 uint32_t firstce = strsrch->pattern.CE[0];
913 * the first pattern ce does not match the first ce of the character, we bail.
933 if (strsrch->pattern.hasPrefixAccents) {
938 int32_t firstce = strsrch->pattern.CE[0];
1014 if (strsrch->pattern.hasSuffixAccents) {
1020 int32_t firstce = strsrch->pattern.CE[0];
1031 while (count < strsrch->pattern.CELength) {
1098 UnicodeString(FALSE, strsrch->pattern.text, strsrch->pattern.textLength), p2, status);
1215 int32_t *patternce = strsrch->pattern.CE;
1216 int32_t patterncelength = strsrch->pattern.CELength;
1409 * pattern in string search data
1418 int patternceindex = strsrch->pattern.CELength;
1419 int32_t *patternce = strsrch->pattern.CE;
1439 * match with the pattern.
1444 * step 2: check if any of the generated substrings matches the pattern.
1618 int32_t *ce = strsrch->pattern.CE;
1619 int32_t celength = strsrch->pattern.CELength;
1701 * equivalents and check their corresponding ces with the pattern ce.
1704 * match with the pattern.
1709 * step 2: check if any of the generated substrings matches the pattern.
1729 if (strsrch->pattern.hasPrefixAccents) {
1740 if (!strsrch->pattern.hasSuffixAccents) {
1786 * pattern data
1796 if (strsrch->pattern.hasPrefixAccents && textoffset > 0) {
1858 int32_t *patternce = strsrch->pattern.CE;
1859 int32_t patterncelength = strsrch->pattern.CELength;
1876 // pure accent pattern is matched without rearrangement
1926 if ((strsrch->pattern.hasSuffixAccents &&
1928 (strsrch->pattern.hasPrefixAccents &&
1969 * @param patternceindex index of the ce within the pattern ce buffer which
1984 textoffset -= strsrch->pattern.defaultShiftSize;
1989 int32_t shift = strsrch->pattern.backShift[hash(ce)];
2000 textoffset -= strsrch->pattern.defaultShiftSize;
2056 int32_t *patternce = strsrch->pattern.CE;
2057 int32_t patterncelength = strsrch->pattern.CELength;
2143 * match with the pattern.
2148 * step 2: check if any of the generated substrings matches the pattern.
2281 int32_t *ce = strsrch->pattern.CE;
2282 int32_t celength = strsrch->pattern.CELength;
2364 * canonical equivalents and check their corresponding ces with the pattern ce.
2367 * match with the pattern.
2372 * step 2: check if any of the generated substrings matches the pattern.
2392 if (strsrch->pattern.hasSuffixAccents) {
2403 if (!strsrch->pattern.hasPrefixAccents) {
2496 int32_t *patternce = strsrch->pattern.CE;
2497 int32_t patterncelength = strsrch->pattern.CELength;
2514 // pure accent pattern is matched without rearrangement
2564 if ((strsrch->pattern.hasSuffixAccents &&
2566 (strsrch->pattern.hasPrefixAccents &&
2602 U_CAPI UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern,
2622 // pattern, text checks are done in usearch_openFromCollator
2623 UStringSearch *result = usearch_openFromCollator(pattern,
2643 const UChar *pattern,
2660 if (pattern == NULL || text == NULL || collator == NULL) {
2682 patternlength = u_strlen(pattern);
2720 result->pattern.text = pattern;
2721 result->pattern.textLength = patternlength;
2722 result->pattern.CE = NULL;
2723 result->pattern.PCE = NULL;
2765 if (strsrch->pattern.CE != strsrch->pattern.CEBuffer &&
2766 strsrch->pattern.CE) {
2767 uprv_free(strsrch->pattern.CE);
2770 if (strsrch->pattern.PCE != NULL &&
2771 strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
2772 uprv_free(strsrch->pattern.PCE);
3064 const UChar *pattern,
3069 if (strsrch == NULL || pattern == NULL) {
3074 patternlength = u_strlen(pattern);
3080 strsrch->pattern.text = pattern;
3081 strsrch->pattern.textLength = patternlength;
3092 *length = strsrch->pattern.textLength;
3093 return strsrch->pattern.text;
3167 * be 2 matches within the same normalization is when the pattern is consists
3192 (offset + strsrch->pattern.defaultShiftSize > textlength ||
3225 if (strsrch->pattern.CELength == 0) {
3318 (offset < strsrch->pattern.defaultShiftSize ||
3320 matchedindex < strsrch->pattern.defaultShiftSize)))) {
3326 // Could check pattern length, but the
3336 if (strsrch->pattern.CELength == 0) {
3479 bufSize = ss->pattern.PCELength + CEBUFFER_EXTRA;
3481 const UChar * patText = ss->pattern.text;
3483 const UChar * patTextLimit = patText + ss->pattern.textLength;
3817 printf("Pattern CEs\n");
3818 for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
3819 printf(" %8x", strsrch->pattern.CE[ii]);
3828 if(strsrch->pattern.CELength == 0 ||
3831 strsrch->pattern.CE == NULL) {
3836 if (strsrch->pattern.PCE == NULL) {
3887 for (patIx=0; patIx<strsrch->pattern.PCELength; patIx++) {
3888 patCE = strsrch->pattern.PCE[patIx];
3890 // Compare CE from target string with CE from the pattern.
3908 targetIxOffset += strsrch->pattern.PCELength; // this is now the offset in target CE space to end of the match so far
3955 // it is part of the last target element matched by the pattern;
3977 // This can happen if the pattern itself begins with a combining char, and
4084 printf("Pattern CEs\n");
4085 for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
4086 printf(" %8x", strsrch->pattern.CE[ii]);
4095 if(strsrch->pattern.CELength == 0 ||
4098 strsrch->pattern.CE == NULL) {
4103 if (strsrch->pattern.PCE == NULL) {
4150 // But patIx is 0 at the beginning of the pattern and increases toward the end.
4151 // So this loop performs a comparison starting with the end of pattern, and prcessd toward the beginning of the pattern
4168 for (patIx = strsrch->pattern.PCELength - 1; patIx >= 0; patIx -= 1) {
4169 int64_t patCE = strsrch->pattern.PCE[patIx];
4171 targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 - patIx + targetIxOffset);
4172 // Compare CE from target string with CE from the pattern.
4207 const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 + targetIxOffset);
4211 // This can happen if the pattern itself begins with a combining char, and
4333 int32_t *patternce = strsrch->pattern.CE;
4334 int32_t patterncelength = strsrch->pattern.CELength;
4352 // finding the last pattern ce match, imagine composite characters
4353 // for example: search for pattern A in text \u00C0
4363 // normalization and pattern \u0300, where \u0315 is ignorable
4447 int32_t *patternce = strsrch->pattern.CE;
4448 int32_t patterncelength = strsrch->pattern.CELength;
4451 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4468 // finding the last pattern ce match, imagine composite characters
4469 // for example: search for pattern A in text \u00C0
4561 int32_t *patternce = strsrch->pattern.CE;
4562 int32_t patterncelength = strsrch->pattern.CELength;
4586 // finding the first pattern ce match, imagine composite
4587 // characters. for example: search for pattern \u0300 in text
4662 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCEs++) {
4703 int32_t *patternce = strsrch->pattern.CE;
4704 int32_t patterncelength = strsrch->pattern.CELength;
4707 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4730 // finding the first pattern ce match, imagine composite
4731 // characters. for example: search for pattern \u0300 in text
4811 for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCEs++) {