Cross Reference: /external/pcre/dist2/src/pcre2

Lines Matching refs:utf
127 UTF-8 mode, the number of subject bytes matched may be different to the number
128 of reference bytes. (In theory this could also happen in UTF-16 mode, but it
149 BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
169 /* Separate the caseless and UTF cases for speed. */
177   if (utf)
180     code units matched may differ, because in UTF-8 there are some characters
182     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
183     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
211     /* Not in UTF mode */
229 are in UTF mode. */
556 the mb structure (e.g. utf, end_subject) into individual variables to improve
589 register BOOL utf;         /* Local copy of UTF flag for speed */
762 utf = (mb->poptions & PCRE2_UTF) != 0;
764 utf = FALSE;
1703     move back, this match function fails. When working with UTF-8 we move
1709     if (utf)
1721     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
2264       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2269       if (utf)
2317 #endif  /* SUPPORT UTF */
2319       /* Not in UTF-8 mode, but we may still have PCRE2_UCP set, and for
2406     if (utf) ACROSSCHAR(eptr < mb->end_subject, *eptr, eptr++);
2411     /* Match a single code unit, even in UTF-8 mode. This opcode really does
2746         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2931       case. However, in caseless UTF-8 mode there are pairs of case-equivalent
2974     0-127 when UTF-8 processing is enabled. The only difference between
3029       if (utf)
3049       /* Not UTF mode */
3081         if (utf)
3104         /* Not UTF mode */
3137         if (utf)
3169           /* Not UTF mode */
3208     encountered only when UTF-8 mode mode is supported. In the 16-bit and
3210     UTF is not supported. */
3263         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3287           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3310           if (!PRIV(xclass)(c, data, utf)) break;
3322           if (utf) BACKCHAR(eptr);
3336     if (utf)
3353     /* Not UTF mode */
3376     if (utf)
3420     /* Not UTF mode */
3508     The various UTF/non-UTF and caseful/caseless cases are handled separately,
3513     if (utf)
3587           /* After \C in UTF mode, pp might be in the middle of a Unicode
3602       /* If the length of a UTF-8 character is 1, we fall through here, and
3603       obey the code as for non-UTF-8 characters below, though in this case the
3609       /* When not in UTF-8 mode, load a single-byte character. */
3613     or may not be in UTF mode. The code is duplicated for the caseless and
3624       /* fc must be < 128 if UTF is enabled. */
3628       if (utf && fc > 127)
3763     if (utf)
3884       if (utf && fc > 127)
3891       if (utf)
3907       /* Not UTF mode */
3926         if (utf)
3945         /* Not UTF mode */
3971         if (utf)
3988           /* After \C in UTF mode, pp might be in the middle of a Unicode
4002         /* Not UTF mode */
4032       if (utf)
4048       /* Not UTF mode */
4066         if (utf)
4085         /* Not UTF mode */
4110         if (utf)
4127           /* After \C in UTF mode, pp might be in the middle of a Unicode
4141         /* Not UTF mode */
4226     in UTF-8 mode, '.' matches a character of any length, but for the other
4244     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4245     is tidier. Also separate the UCP code, which can be the same for both UTF-8
4454               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4468 /* Handle all other cases when the coding is UTF-8 */
4471       if (utf) switch(ctype)
4721       /* Code for the non-UTF-8 case for minimum matching of operators other
4972     subsequent match. Again, separate the UTF-8 case for speed, and also
5215               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5229       if (utf)
5354       /* Not UTF mode */
5492     UTF-8 and UCP stuff separate. */
5702         /* After \C in UTF mode, pp might be in the middle of a Unicode
5711           if (utf) BACKCHAR(eptr);
5735               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5750         backtracking because the use of \C in UTF mode can cause BACKCHAR to
5751         move back past pp. This is just palliative; the use of \C in UTF mode
5768           if (!utf) c = *eptr; else
5779             if (!utf) c = *fptr; else
5796       if (utf)
5839             eptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
6026         /* After \C in UTF mode, pp might be in the middle of a Unicode
6042       /* Not UTF mode */
6459 BOOL utf;
6531 /* These two settings are used in the code for checking a UTF string that
6536 utf = (re->overall_options & PCRE2_UTF) != 0;
6540 /* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
6549 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
6757       if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
6777     if (utf && req_cu > 127) req_cu2 = UCD_OTHERCASE(req_cu);
6814       if (utf)
6857         if (utf)
7039     if (utf)
OpenGrok