Home | History | Annotate | Download | only in src

Lines Matching refs:utf

127 UTF-8 mode, the number of subject bytes matched may be different to the number
128 of reference bytes. (In theory this could also happen in UTF-16 mode, but it
149 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
169 /* Separate the caseless and UTF cases for speed. */
177 if (utf)
180 code units matched may differ, because in UTF-8 there are some characters
182 For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
183 (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
211 /* Not in UTF mode */
229 are in UTF mode. */
556 the mb structure (e.g. utf, end_subject) into individual variables to improve
589 register BOOL utf; /* Local copy of UTF flag for speed */
762 utf = (mb->poptions & PCRE2_UTF) != 0;
764 utf = FALSE;
1703 move back, this match function fails. When working with UTF-8 we move
1709 if (utf)
1721 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
2264 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2269 if (utf)
2317 #endif /* SUPPORT UTF */
2319 /* Not in UTF-8 mode, but we may still have PCRE2_UCP set, and for
2406 if (utf) ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
2411 /* Match a single code unit, even in UTF-8 mode. This opcode really does
2746 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2931 case. However, in caseless UTF-8 mode there are pairs of case-equivalent
2974 0-127 when UTF-8 processing is enabled. The only difference between
3029 if (utf)
3049 /* Not UTF mode */
3081 if (utf)
3104 /* Not UTF mode */
3137 if (utf)
3169 /* Not UTF mode */
3208 encountered only when UTF-8 mode mode is supported. In the 16-bit and
3210 UTF is not supported. */
3263 if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3287 if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3310 if (!PRIV(xclass)(c, data, utf)) break;
3322 if (utf) BACKCHAR(eptr);
3336 if (utf)
3353 /* Not UTF mode */
3376 if (utf)
3420 /* Not UTF mode */
3508 The various UTF/non-UTF and caseful/caseless cases are handled separately,
3513 if (utf)
3587 /* After \C in UTF mode, pp might be in the middle of a Unicode
3602 /* If the length of a UTF-8 character is 1, we fall through here, and
3603 obey the code as for non-UTF-8 characters below, though in this case the
3609 /* When not in UTF-8 mode, load a single-byte character. */
3613 or may not be in UTF mode. The code is duplicated for the caseless and
3624 /* fc must be < 128 if UTF is enabled. */
3628 if (utf && fc > 127)
3763 if (utf)
3884 if (utf && fc > 127)
3891 if (utf)
3907 /* Not UTF mode */
3926 if (utf)
3945 /* Not UTF mode */
3971 if (utf)
3988 /* After \C in UTF mode, pp might be in the middle of a Unicode
4002 /* Not UTF mode */
4032 if (utf)
4048 /* Not UTF mode */
4066 if (utf)
4085 /* Not UTF mode */
4110 if (utf)
4127 /* After \C in UTF mode, pp might be in the middle of a Unicode
4141 /* Not UTF mode */
4226 in UTF-8 mode, '.' matches a character of any length, but for the other
4244 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4245 is tidier. Also separate the UCP code, which can be the same for both UTF-8
4454 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4468 /* Handle all other cases when the coding is UTF-8 */
4471 if (utf) switch(ctype)
4721 /* Code for the non-UTF-8 case for minimum matching of operators other
4972 subsequent match. Again, separate the UTF-8 case for speed, and also
5215 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5229 if (utf)
5354 /* Not UTF mode */
5492 UTF-8 and UCP stuff separate. */
5702 /* After \C in UTF mode, pp might be in the middle of a Unicode
5711 if (utf) BACKCHAR(eptr);
5735 if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5750 backtracking because the use of \C in UTF mode can cause BACKCHAR to
5751 move back past pp. This is just palliative; the use of \C in UTF mode
5768 if (!utf) c = *eptr; else
5779 if (!utf) c = *fptr; else
5796 if (utf)
5839 eptr = mb->end_subject; /* Unlimited UTF-8 repeat */
6026 /* After \C in UTF mode, pp might be in the middle of a Unicode
6042 /* Not UTF mode */
6459 BOOL utf;
6531 /* These two settings are used in the code for checking a UTF string that
6536 utf = (re->overall_options & PCRE2_UTF) != 0;
6540 /* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
6549 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
6757 if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
6777 if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
6814 if (utf)
6857 if (utf)
7039 if (utf)