Home | History | Annotate | Download | only in dist

Lines Matching refs:utf

150 in UTF-8 mode. */
195 /* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */
228 string is built from string macros so that it works in UTF-8 mode on EBCDIC
487 "this version of PCRE is compiled without UTF support\0"
501 "invalid UTF-8 string\0"
510 "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
537 "invalid UTF-16 string\0"
541 "invalid UTF-32 string\0"
542 "setting UTF is disabled by the application\0"
580 UTF-8 mode. */
619 /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
976 When UTF-8 is enabled, a positive value greater than 255 may be returned in
999 BOOL utf = (options & PCRE_UTF8) != 0;
1016 #ifndef EBCDIC /* ASCII/UTF-8 coding */
1060 #ifndef EBCDIC /* ASCII/UTF-8 coding */
1070 if (c > (utf ? 0x10ffffU : 0xffU))
1072 if (c > (utf ? 0x10ffffU : 0xffffU))
1074 if (utf && c > 0x10ffffU)
1079 else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1246 to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode,
1254 if (!utf && c > 0xff) *errorcodeptr = ERR51;
1277 if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1279 if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1281 if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1291 if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1310 #ifndef EBCDIC /* ASCII/UTF-8 coding */
1322 greater than 0xff in utf or non-8bit mode, but only if the ddd are hex
1349 #ifndef EBCDIC /* ASCII/UTF-8 coding */
1358 if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1360 if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1362 if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1374 if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1394 #ifndef EBCDIC /* ASCII/UTF-8 coding */
1418 #ifndef EBCDIC /* ASCII/UTF-8 coding */
1704 utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
1710 or -2 if \C was encountered (in UTF-8 mode only)
1716 find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd)
1744 d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd);
1778 d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd);
1839 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1853 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1887 /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode;
2043 utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
2050 PRIV(find_bracket)(const pcre_uchar *code, BOOL utf, int number)
2123 /* In UTF-8 mode, opcodes that are followed by a character may be followed by
2128 if (utf) switch(c)
2162 (void)(utf); /* Keep compiler happy by referencing function argument */
2179 utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
2185 find_recurse(const pcre_uchar *code, BOOL utf)
2240 /* In UTF-8 mode, opcodes that are followed by a character may be followed
2245 if (utf) switch(c)
2307 (void)(utf); /* Keep compiler happy by referencing function argument */
2330 utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode
2344 BOOL utf, compile_data *cd, recurse_check *recurses)
2416 if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
2472 if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))
2617 /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
2652 if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
2670 if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
2711 utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode
2719 branch_chain *bcptr, BOOL utf, compile_data *cd)
2723 if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
2849 utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode
2862 get_chr_property_list(const pcre_uchar *code, BOOL utf,
2874 utf = utf; /* Suppress "unused parameter" compiler warning */
2953 if (chr < 128 || (chr < 256 && !utf))
3059 utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
3067 compare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd,
3155 if (!compare_opcodes(code, utf, cd, base_list, base_end)) return FALSE;
3175 if (!compare_opcodes(next_code, utf, cd, base_list, base_end))
3187 code = get_chr_property_list(code, utf, cd->fcc, list);
3208 /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
3209 || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
3214 if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
3562 list_ptr[2] + LINK_SIZE, utf)) return FALSE;
3595 utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
3602 auto_possessify(pcre_uchar *code, BOOL utf, const compile_data *cd)
3617 get_chr_property_list(code, utf, cd->fcc, list) : NULL;
3620 if (end != NULL && compare_opcodes(end, utf, cd, list, end))
3672 end = get_chr_property_list(code, utf, cd->fcc, list);
3676 if (compare_opcodes(end, utf, cd, list, end))
3748 /* In UTF-8 mode, opcodes that are followed by a character may be followed by
3753 if (utf) switch(c)
3815 (void)(utf); /* Keep compiler happy by referencing function argument */
3948 utf TRUE in UTF-8 / UTF-16 / UTF-32 mode
3956 adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
3961 while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
4051 /* This function is passed the start and end of a class range, in UTF-8 mode
4119 valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
4181 /* Not UTF-mode, or no UCP */
4241 /* Without UTF support, character values are constrained by the bit length,
4336 BOOL utf = (options & PCRE_UTF8) != 0;
4344 (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
4417 /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
4423 BOOL utf = (options & PCRE_UTF8) != 0;
4428 BOOL utf = FALSE;
4433 though it will not be used in non-UTF 8-bit cases. This avoids having to supply
4626 if (utf) FORWARDCHAR(ptr);
4859 if (utf && HAS_EXTRALEN(c))
5234 if (utf)
5240 d = *ptr; /* Not UTF-8 mode */
5340 /* For caseless UTF-8 mode when UCP support is available, check
5345 if (utf && (options & PCRE_CASELESS) != 0 &&
5359 if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
5375 if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
5566 if (utf) FORWARDCHAR(p);
5639 /* Deal with UTF characters that take up more than one character. It's
5645 if (utf && NOT_FIRSTCHAR(code[-1]))
5649 c = (int)(code - lastchar); /* Length of UTF-8 character */
5656 /* Handle the case of a single charater - either with no UTF support, or
5657 with UTF disabled, or for a single character UTF character. */
5747 required property. In UTF-8 mode, long characters have their length in
5753 if (utf && (c & UTF_LENGTH) != 0)
5778 if (utf && (c & UTF_LENGTH) != 0)
5808 if (utf && (c & UTF_LENGTH) != 0)
5927 adjust_recurse(previous, 1, utf, cd, save_hwm);
5951 adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm);
6188 if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
6211 adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm);
6300 if (utf && HAS_EXTRALEN(tempcode[-1]))
6345 adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
6394 adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
7304 called = PRIV(find_bracket)(cd->start_code, utf, recno);
7340 could_be_empty(called, code, bcptr, utf, cd))
7862 /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE
7867 *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
7873 /* We have a data character whose value is in c. In UTF-8 mode it may have
7878 if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
7892 when the extended flag is set. If we are in a UTF mode, it may be a
7901 if (utf && HAS_EXTRALEN(c))
7906 in mclength. When not in UTF-8 mode, the length is always 1. */
7911 /* For caseless UTF-8 mode when UCP support is available, check whether
7916 if (utf && (options & PCRE_CASELESS) != 0)
8793 BOOL utf;
8875 relevant libraries, but (*UTF) is generic and always supported. Note that
8962 utf = (options & PCRE_UTF8) != 0;
8963 if (utf && never_utf)
8969 /* Can't support UTF unless PCRE has been compiled to include the code. The
8975 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
8988 if (utf)
9261 groupptr = PRIV(find_bracket)(codestart, utf, recno);
9291 auto_possessify(temp, utf, cd);
9311 for (cc = (pcre_uchar *)PRIV(find_bracket)(codestart, utf, -1);
9313 cc = (pcre_uchar *)PRIV(find_bracket)(cc, utf, -1))
9382 if (utf)
9424 if (utf)
9496 if (could_be_empty_branch(codestart, code, utf, cd, NULL))