Cross Reference: /external/pcre/dist2/src/pcre2

Lines Matching refs:utf
179 /* This bit (which is greater than any UTF value) is used to indicate that a
199 UTF-8 mode. */
239 /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
284 in UTF-8 mode. It runs from '0' to 'z'. */
334 /* This is the "abnormal" table for EBCDIC systems without UTF-8 support.
379 string is built from string macros so that it works in UTF-8 mode on EBCDIC
630 /* This is a table of start-of-pattern options such as (*UTF) and settings such
632 compatibility, (*UTFn) is supported in the relevant libraries, but (*UTF) is
859 UTF mode, the result is in code units rather than bytes. The branch is
873   utf         TRUE in UTF mode
882              or -3 if \C was encountered (in UTF mode only)
896 find_fixedlength(PCRE2_UCHAR *code, BOOL utf, BOOL atend, compile_block *cb,
952     d = find_fixedlength(cc, utf, atend, cb, recurses, countptr);
1002     d = find_fixedlength(cs, utf, atend, cb, &this_recurse, countptr);
1072     if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1086     if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1120     /* The single-byte matcher isn't allowed. This only happens in UTF-8 or
1121     UTF-16 mode; otherwise \C is coded as OP_ALLANY. */
1351   utf         TRUE if in UTF mode
1368 could_be_empty_branch(PCRE2_SPTR code, PCRE2_SPTR endcode, BOOL utf,
1462       int rc = could_be_empty_branch(scode, endcode, utf, cb, atend,
1523           int rc = could_be_empty_branch(code, endcode, utf, cb, atend,
1670     /* In UTF-8 or UTF-16 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY,
1704     if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
1721     if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
1827 BOOL utf = (options & PCRE2_UTF) != 0;
1911       if (utf)
2082     to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode,
2090     if (!utf && c > 0xff) *errorcodeptr = ERR51;
2113         if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
2115         if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
2117         if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
2127         if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
2147     greater than 0xff in UTF-8 or non-8bit mode, but only if the ddd are hex
2174           if ((utf && c > 0x10ffffU) || (!utf && c > MAX_NON_UTF_CHAR))
2188           if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
2244 #ifndef EBCDIC    /* ASCII/UTF-8 coding */
2309 PCRE2 is compiled with support for UTF and Unicode properties. On entry, the
2472 utf         TRUE in UTF mode
2478 find_recurse(PCRE2_SPTR code, BOOL utf)
2535     /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may
2540     if (utf) switch(c)
2602     (void)(utf);  /* Keep compiler happy by referencing function argument */
2782 valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
2848   /* Not UTF mode */
2898   /* Without UTF support, character values are constrained by the bit length,
2993 BOOL utf = (options & PCRE2_UTF) != 0;
3001     (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
3023   utf           TRUE if processing UTF
3031   uint32_t options, BOOL utf, compile_block *cb)
3076           if (utf) FORWARDCHAR(ptr);
3117   if (utf)
3141   /* Not UTF */
3231 BOOL utf = (options & PCRE2_UTF) != 0;
3293         if (utf) FORWARDCHAR(ptr);
3376       if (utf && HAS_EXTRALEN(c))
3439           if (process_verb_name(&ptr, NULL, &errorcode, options, utf, cb) < 0)
3888 /* We can fish out the UTF setting once and for all into a BOOL, but we must
3893 BOOL utf = (options & PCRE2_UTF) != 0;
3898 #else  /* No UTF support */
3899 BOOL utf = FALSE;
3904 though it will not be used in non-UTF 8-bit cases. This avoids having to supply
4103         if (utf) FORWARDCHAR(ptr);
4369       if (utf && HAS_EXTRALEN(c))
4434         directly. UCP support is not available unless UTF support is.*/
4746         if (utf)
4752         d = *ptr;  /* Not UTF mode */
4898           /* For caseless UTF mode, check whether this character has more than
4903           if (utf && (options & PCRE2_CASELESS) != 0 &&
5133           if (utf) FORWARDCHAR(ptr);
5200       /* Deal with UTF characters that take up more than one code unit. It's
5206       if (utf && NOT_FIRSTCU(code[-1]))
5210         c = (int)(code - lastchar);               /* Length of UTF character */
5217       /* Handle the case of a single charater - either with no UTF support, or
5218       with UTF disabled, or for a single-code-unit UTF character. */
5313         then generate the second opcode. In UTF mode, multi-code-unit
5322           if (utf && (c & UTF_LENGTH) != 0)
5359       if (utf && (c & UTF_LENGTH) != 0)
5680               int rc = could_be_empty_branch(scode, ketcode, utf, cb, FALSE,
5803         if (utf && HAS_EXTRALEN(tempcode[-1]))
5903       greater than the code unit maximum when not in UTF mode. */
5927             utf, cb);
6021                   utf, cb);
7423         /* In non-UTF mode, and for both 32-bit modes, we turn \C into
7432           *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
7439     /* We have a data character whose value is in c. In UTF-8 mode it may have
7449     when the extended flag is set. If we are in a UTF mode, it may be a
7458     if (utf && HAS_EXTRALEN(c))
7463     in mclength. When not in UTF mode, the length is always 1. */
7468     /* For caseless UTF mode, check whether this character has more than one
7472     if (utf && (options & PCRE2_CASELESS) != 0)
8315 BOOL utf;                               /* Set TRUE for UTF mode */
8332 uint32_t skipatstart;                   /* When checking (*UTF) etc */
8539 /* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */
8549 /* Check UTF. We have the original options in 'options', with that value as
8550 modified by (*UTF) etc in cb->external_options. */
8552 utf = (cb.external_options & PCRE2_UTF) != 0;
8553 if (utf)
8812   for (rcode = (PCRE2_UCHAR *)find_recurse(codestart, utf);
8814        rcode = (PCRE2_UCHAR *)find_recurse(rcode + 1 + LINK_SIZE, utf))
8840         rgroup = PRIV(find_bracket)(search_from, utf, recno);
8879     if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80;
8902   for (cc = (PCRE2_UCHAR *)PRIV(find_bracket)(codestart, utf, -1);
8904        cc = (PCRE2_UCHAR *)PRIV(find_bracket)(cc, utf, -1))
8913       fixed_length = find_fixedlength(cc, utf, TRUE, &cb, NULL, &count);
8980       if (firstcu < 128 || (!utf && firstcu < 255))
8985       /* The first code unit is > 128 in UTF mode, or > 255 otherwise. In
8986       8-bit UTF mode, codepoints in the range 128-255 are introductory code
8988       check wide characters when UTF (and therefore UCP) is supported. */
9022     if (reqcu < 128 || (!utf && reqcu < 255))
9039   int rc = could_be_empty_branch(codestart, code, utf, &cb, TRUE, NULL, &count);
OpenGrok