Lines Matching refs:pattern
282 * Initializing the ce table for a pattern.
284 * Table size will be estimated by the size of the pattern text. Table
297 UPattern *pattern = &(strsrch->pattern);
299 int32_t *cetable = pattern->CEBuffer;
300 uint32_t patternlength = pattern->textLength;
304 coleiter = ucol_openElements(strsrch->collator, pattern->text,
312 uprv_init_collIterate(strsrch->collator, pattern->text,
313 pattern->textLength,
317 if (pattern->CE != cetable && pattern->CE) {
318 uprv_free(pattern->CE);
337 if (cetable != temp && cetable != pattern->CEBuffer) {
346 pattern->CE = cetable;
347 pattern->CELength = offset;
353 * Initializing the pce table for a pattern.
355 * Table size will be estimated by the size of the pattern text. Table
368 UPattern *pattern = &(strsrch->pattern);
370 int64_t *pcetable = pattern->PCEBuffer;
371 uint32_t patternlength = pattern->textLength;
375 coleiter = ucol_openElements(strsrch->collator, pattern->text,
382 uprv_init_collIterate(strsrch->collator, pattern->text,
383 pattern->textLength,
387 if (pattern->PCE != pcetable && pattern->PCE != NULL) {
388 uprv_free(pattern->PCE);
413 if (pcetable != temp && pcetable != pattern->PCEBuffer) {
422 pattern->PCE = pcetable;
423 pattern->PCELength = offset;
429 * Initializes the pattern struct.
434 * @return expansionsize the total expansion size of the pattern
439 UPattern *pattern = &(strsrch->pattern);
440 const UChar *patterntext = pattern->text;
441 int32_t length = pattern->textLength;
444 // Since the strength is primary, accents are ignored in the pattern.
446 pattern->hasPrefixAccents = 0;
447 pattern->hasSuffixAccents = 0;
449 pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >>
453 pattern->hasSuffixAccents = getFCD(patterntext, &index, length) &
458 if (strsrch->pattern.PCE != NULL) {
459 if (strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
460 uprv_free(strsrch->pattern.PCE);
463 strsrch->pattern.PCE = NULL;
475 * @param cetable table containing pattern ce
476 * @param cesize size of the pattern ces
519 * Building of the pattern collation element list and the boyer moore strsrch
524 * a number of characters in the text and tries to match the pattern from that
528 * possible representation of the pattern. Anyways, we'll err on the smaller
531 * pattern into 3 parts, the prefix accents (PA), the middle string bounded by
540 * If pattern has no non-ignorable ce, we return a illegal argument error.
550 if (U_SUCCESS(*status) && strsrch->pattern.CELength > 0) {
551 UPattern *pattern = &strsrch->pattern;
552 int32_t cesize = pattern->CELength;
556 pattern->defaultShiftSize = minlength;
557 setShiftTable(pattern->shift, pattern->backShift, pattern->CE,
561 strsrch->pattern.defaultShiftSize = 0;
633 for (int32_t count = 0; count < strsrch->pattern.CELength;
640 if (U_FAILURE(status) || ce != strsrch->pattern.CE[count]) {
693 * Gets the next base character offset depending on the string search pattern
706 if (strsrch->pattern.hasSuffixAccents &&
726 * @param patternceindex index of the ce within the pattern ce buffer which
736 UPattern *pattern = &(strsrch->pattern);
738 int32_t shift = pattern->shift[hash(ce)];
741 int32_t adjust = pattern->CELength - patternceindex;
748 textoffset += pattern->defaultShiftSize;
835 if (strsrch->pattern.hasPrefixAccents) {
875 uint32_t firstce = strsrch->pattern.CE[0];
903 * the first pattern ce does not match the first ce of the character, we bail.
923 if (strsrch->pattern.hasPrefixAccents) {
928 int32_t firstce = strsrch->pattern.CE[0];
1004 if (strsrch->pattern.hasSuffixAccents) {
1010 int32_t firstce = strsrch->pattern.CE[0];
1021 while (count < strsrch->pattern.CELength) {
1089 strsrch->pattern.text,
1090 strsrch->pattern.textLength,
1096 UChar *text, *pattern;
1099 pattern = p2;
1108 pattern = text + decomplength;
1111 unorm_decompose(pattern, decomplength, strsrch->pattern.text,
1112 strsrch->pattern.textLength, FALSE, 0, &status);
1116 text = pattern = t2;
1119 UBool result = (UBool)(u_memcmp(pattern, text, decomplength) == 0);
1239 int32_t *patternce = strsrch->pattern.CE;
1240 int32_t patterncelength = strsrch->pattern.CELength;
1433 * pattern in string search data
1442 int patternceindex = strsrch->pattern.CELength;
1443 int32_t *patternce = strsrch->pattern.CE;
1463 * match with the pattern.
1468 * step 2: check if any of the generated substrings matches the pattern.
1642 int32_t *ce = strsrch->pattern.CE;
1643 int32_t celength = strsrch->pattern.CELength;
1725 * equivalents and check their corresponding ces with the pattern ce.
1728 * match with the pattern.
1733 * step 2: check if any of the generated substrings matches the pattern.
1753 if (strsrch->pattern.hasPrefixAccents) {
1764 if (!strsrch->pattern.hasSuffixAccents) {
1810 * pattern data
1820 if (strsrch->pattern.hasPrefixAccents && textoffset > 0) {
1882 int32_t *patternce = strsrch->pattern.CE;
1883 int32_t patterncelength = strsrch->pattern.CELength;
1900 // pure accent pattern is matched without rearrangement
1950 if ((strsrch->pattern.hasSuffixAccents &&
1952 (strsrch->pattern.hasPrefixAccents &&
1993 * @param patternceindex index of the ce within the pattern ce buffer which
2008 textoffset -= strsrch->pattern.defaultShiftSize;
2013 int32_t shift = strsrch->pattern.backShift[hash(ce)];
2024 textoffset -= strsrch->pattern.defaultShiftSize;
2080 int32_t *patternce = strsrch->pattern.CE;
2081 int32_t patterncelength = strsrch->pattern.CELength;
2167 * match with the pattern.
2172 * step 2: check if any of the generated substrings matches the pattern.
2305 int32_t *ce = strsrch->pattern.CE;
2306 int32_t celength = strsrch->pattern.CELength;
2388 * canonical equivalents and check their corresponding ces with the pattern ce.
2391 * match with the pattern.
2396 * step 2: check if any of the generated substrings matches the pattern.
2416 if (strsrch->pattern.hasSuffixAccents) {
2427 if (!strsrch->pattern.hasPrefixAccents) {
2520 int32_t *patternce = strsrch->pattern.CE;
2521 int32_t patterncelength = strsrch->pattern.CELength;
2538 // pure accent pattern is matched without rearrangement
2588 if ((strsrch->pattern.hasSuffixAccents &&
2590 (strsrch->pattern.hasPrefixAccents &&
2626 U_CAPI UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern,
2646 // pattern, text checks are done in usearch_openFromCollator
2647 UStringSearch *result = usearch_openFromCollator(pattern,
2667 const UChar *pattern,
2684 if (pattern == NULL || text == NULL || collator == NULL) {
2706 patternlength = u_strlen(pattern);
2742 result->pattern.text = pattern;
2743 result->pattern.textLength = patternlength;
2744 result->pattern.CE = NULL;
2745 result->pattern.PCE = NULL;
2786 if (strsrch->pattern.CE != strsrch->pattern.CEBuffer &&
2787 strsrch->pattern.CE) {
2788 uprv_free(strsrch->pattern.CE);
2791 if (strsrch->pattern.PCE != NULL &&
2792 strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
2793 uprv_free(strsrch->pattern.PCE);
3068 const UChar *pattern,
3073 if (strsrch == NULL || pattern == NULL) {
3078 patternlength = u_strlen(pattern);
3084 strsrch->pattern.text = pattern;
3085 strsrch->pattern.textLength = patternlength;
3096 *length = strsrch->pattern.textLength;
3097 return strsrch->pattern.text;
3171 * be 2 matches within the same normalization is when the pattern is consists
3196 (offset + strsrch->pattern.defaultShiftSize > textlength ||
3229 if (strsrch->pattern.CELength == 0) {
3322 (offset < strsrch->pattern.defaultShiftSize ||
3324 matchedindex < strsrch->pattern.defaultShiftSize)))) {
3330 // Could check pattern length, but the
3340 if (strsrch->pattern.CELength == 0) {
3483 bufSize = ss->pattern.CELength+10;
3744 printf("Pattern CEs\n");
3745 for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
3746 printf(" %8x", strsrch->pattern.CE[ii]);
3755 if(strsrch->pattern.CELength == 0 ||
3758 strsrch->pattern.CE == NULL) {
3763 if (strsrch->pattern.PCE == NULL) {
3790 for (patIx=0; patIx<strsrch->pattern.PCELength; patIx++) {
3791 int64_t patCE = strsrch->pattern.PCE[patIx];
3793 // Compare CE from target string with CE from the pattern.
3819 const CEI *lastCEI = ceb.get(targetIx + strsrch->pattern.PCELength - 1);
3820 const CEI *nextCEI = ceb.get(targetIx + strsrch->pattern.PCELength);
3840 // This can happen if the pattern itself begins with a combining char, and
3939 printf("Pattern CEs\n");
3940 for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
3941 printf(" %8x", strsrch->pattern.CE[ii]);
3950 if(strsrch->pattern.CELength == 0 ||
3953 strsrch->pattern.CE == NULL) {
3958 if (strsrch->pattern.PCE == NULL) {
4009 for (patIx = strsrch->pattern.PCELength - 1; patIx >= 0; patIx -= 1) {
4010 int64_t patCE = strsrch->pattern.PCE[patIx];
4012 targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 - patIx);
4013 // Compare CE from target string with CE from the pattern.
4038 const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1);
4062 // This can happen if the pattern itself begins with a combining char, and
4158 int32_t *patternce = strsrch->pattern.CE;
4159 int32_t patterncelength = strsrch->pattern.CELength;
4177 // finding the last pattern ce match, imagine composite characters
4178 // for example: search for pattern A in text \u00C0
4188 // normalization and pattern \u0300, where \u0315 is ignorable
4270 int32_t *patternce = strsrch->pattern.CE;
4271 int32_t patterncelength = strsrch->pattern.CELength;
4274 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4291 // finding the last pattern ce match, imagine composite characters
4292 // for example: search for pattern A in text \u00C0
4382 int32_t *patternce = strsrch->pattern.CE;
4383 int32_t patterncelength = strsrch->pattern.CELength;
4407 // finding the first pattern ce match, imagine composite
4408 // characters. for example: search for pattern \u0300 in text
4498 int32_t *patternce = strsrch->pattern.CE;
4499 int32_t patterncelength = strsrch->pattern.CELength;
4502 strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents;
4525 // finding the first pattern ce match, imagine composite
4526 // characters. for example: search for pattern \u0300 in text