Home | History | Annotate | Download | only in src

Lines Matching refs:utf

179 /* This bit (which is greater than any UTF value) is used to indicate that a
199 UTF-8 mode. */
239 /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
284 in UTF-8 mode. It runs from '0' to 'z'. */
334 /* This is the "abnormal" table for EBCDIC systems without UTF-8 support.
379 string is built from string macros so that it works in UTF-8 mode on EBCDIC
630 /* This is a table of start-of-pattern options such as (*UTF) and settings such
632 compatibility, (*UTFn) is supported in the relevant libraries, but (*UTF) is
859 UTF mode, the result is in code units rather than bytes. The branch is
873 utf TRUE in UTF mode
882 or -3 if \C was encountered (in UTF mode only)
896 find_fixedlength(PCRE2_UCHAR *code, BOOL utf, BOOL atend, compile_block *cb,
952 d = find_fixedlength(cc, utf, atend, cb, recurses, countptr);
1002 d = find_fixedlength(cs, utf, atend, cb, &this_recurse, countptr);
1072 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1086 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1120 /* The single-byte matcher isn't allowed. This only happens in UTF-8 or
1121 UTF-16 mode; otherwise \C is coded as OP_ALLANY. */
1351 utf TRUE if in UTF mode
1368 could_be_empty_branch(PCRE2_SPTR code, PCRE2_SPTR endcode, BOOL utf,
1462 int rc = could_be_empty_branch(scode, endcode, utf, cb, atend,
1523 int rc = could_be_empty_branch(code, endcode, utf, cb, atend,
1670 /* In UTF-8 or UTF-16 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY,
1704 if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
1721 if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
1827 BOOL utf = (options & PCRE2_UTF) != 0;
1911 if (utf)
2082 to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode,
2090 if (!utf && c > 0xff) *errorcodeptr = ERR51;
2113 if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
2115 if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
2117 if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
2127 if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
2147 greater than 0xff in UTF-8 or non-8bit mode, but only if the ddd are hex
2174 if ((utf && c > 0x10ffffU) || (!utf && c > MAX_NON_UTF_CHAR))
2188 if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
2244 #ifndef EBCDIC /* ASCII/UTF-8 coding */
2309 PCRE2 is compiled with support for UTF and Unicode properties. On entry, the
2472 utf TRUE in UTF mode
2478 find_recurse(PCRE2_SPTR code, BOOL utf)
2535 /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may
2540 if (utf) switch(c)
2602 (void)(utf); /* Keep compiler happy by referencing function argument */
2782 valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
2848 /* Not UTF mode */
2898 /* Without UTF support, character values are constrained by the bit length,
2993 BOOL utf = (options & PCRE2_UTF) != 0;
3001 (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
3023 utf TRUE if processing UTF
3031 uint32_t options, BOOL utf, compile_block *cb)
3076 if (utf) FORWARDCHAR(ptr);
3117 if (utf)
3141 /* Not UTF */
3231 BOOL utf = (options & PCRE2_UTF) != 0;
3293 if (utf) FORWARDCHAR(ptr);
3376 if (utf && HAS_EXTRALEN(c))
3439 if (process_verb_name(&ptr, NULL, &errorcode, options, utf, cb) < 0)
3888 /* We can fish out the UTF setting once and for all into a BOOL, but we must
3893 BOOL utf = (options & PCRE2_UTF) != 0;
3898 #else /* No UTF support */
3899 BOOL utf = FALSE;
3904 though it will not be used in non-UTF 8-bit cases. This avoids having to supply
4103 if (utf) FORWARDCHAR(ptr);
4369 if (utf && HAS_EXTRALEN(c))
4434 directly. UCP support is not available unless UTF support is.*/
4746 if (utf)
4752 d = *ptr; /* Not UTF mode */
4898 /* For caseless UTF mode, check whether this character has more than
4903 if (utf && (options & PCRE2_CASELESS) != 0 &&
5133 if (utf) FORWARDCHAR(ptr);
5200 /* Deal with UTF characters that take up more than one code unit. It's
5206 if (utf && NOT_FIRSTCU(code[-1]))
5210 c = (int)(code - lastchar); /* Length of UTF character */
5217 /* Handle the case of a single charater - either with no UTF support, or
5218 with UTF disabled, or for a single-code-unit UTF character. */
5313 then generate the second opcode. In UTF mode, multi-code-unit
5322 if (utf && (c & UTF_LENGTH) != 0)
5359 if (utf && (c & UTF_LENGTH) != 0)
5680 int rc = could_be_empty_branch(scode, ketcode, utf, cb, FALSE,
5803 if (utf && HAS_EXTRALEN(tempcode[-1]))
5903 greater than the code unit maximum when not in UTF mode. */
5927 utf, cb);
6021 utf, cb);
7423 /* In non-UTF mode, and for both 32-bit modes, we turn \C into
7432 *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
7439 /* We have a data character whose value is in c. In UTF-8 mode it may have
7449 when the extended flag is set. If we are in a UTF mode, it may be a
7458 if (utf && HAS_EXTRALEN(c))
7463 in mclength. When not in UTF mode, the length is always 1. */
7468 /* For caseless UTF mode, check whether this character has more than one
7472 if (utf && (options & PCRE2_CASELESS) != 0)
8315 BOOL utf; /* Set TRUE for UTF mode */
8332 uint32_t skipatstart; /* When checking (*UTF) etc */
8539 /* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */
8549 /* Check UTF. We have the original options in 'options', with that value as
8550 modified by (*UTF) etc in cb->external_options. */
8552 utf = (cb.external_options & PCRE2_UTF) != 0;
8553 if (utf)
8812 for (rcode = (PCRE2_UCHAR *)find_recurse(codestart, utf);
8814 rcode = (PCRE2_UCHAR *)find_recurse(rcode + 1 + LINK_SIZE, utf))
8840 rgroup = PRIV(find_bracket)(search_from, utf, recno);
8879 if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80;
8902 for (cc = (PCRE2_UCHAR *)PRIV(find_bracket)(codestart, utf, -1);
8904 cc = (PCRE2_UCHAR *)PRIV(find_bracket)(cc, utf, -1))
8913 fixed_length = find_fixedlength(cc, utf, TRUE, &cb, NULL, &count);
8980 if (firstcu < 128 || (!utf && firstcu < 255))
8985 /* The first code unit is > 128 in UTF mode, or > 255 otherwise. In
8986 8-bit UTF mode, codepoints in the range 128-255 are introductory code
8988 check wide characters when UTF (and therefore UCP) is supported. */
9022 if (reqcu < 128 || (!utf && reqcu < 255))
9039 int rc = could_be_empty_branch(codestart, code, utf, &cb, TRUE, NULL, &count);