Cross Reference: /external/pcre/dist/pcre

Lines Matching refs:utf
150 in UTF-8 mode. */
195 /* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */
228 string is built from string macros so that it works in UTF-8 mode on EBCDIC
487   "this version of PCRE is compiled without UTF support\0"
501   "invalid UTF-8 string\0"
510   "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
537   "invalid UTF-16 string\0"
541   "invalid UTF-32 string\0"
542   "setting UTF is disabled by the application\0"
580 UTF-8 mode. */
619 /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
976 When UTF-8 is enabled, a positive value greater than 255 may be returned in
999 BOOL utf = (options & PCRE_UTF8) != 0;
1016 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1060 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1070         if (c > (utf ? 0x10ffffU : 0xffU))
1072         if (c > (utf ? 0x10ffffU : 0xffffU))
1074         if (utf && c > 0x10ffffU)
1079         else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1246     to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode,
1254     if (!utf && c > 0xff) *errorcodeptr = ERR51;
1277         if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1279         if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1281         if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1291         if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1310 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1322     greater than 0xff in utf or non-8bit mode, but only if the ddd are hex
1349 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1358           if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
1360           if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
1362           if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
1374           if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1394 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1418 #ifndef EBCDIC    /* ASCII/UTF-8 coding */
1704   utf      TRUE in UTF-8 / UTF-16 / UTF-32 mode
1710              or -2 if \C was encountered (in UTF-8 mode only)
1716 find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd)
1744     d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd);
1778     d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd);
1839     if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1853     if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1887     /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode;
2043   utf         TRUE in UTF-8 / UTF-16 / UTF-32 mode
2050 PRIV(find_bracket)(const pcre_uchar *code, BOOL utf, int number)
2123   /* In UTF-8 mode, opcodes that are followed by a character may be followed by
2128     if (utf) switch(c)
2162     (void)(utf);  /* Keep compiler happy by referencing function argument */
2179   utf         TRUE in UTF-8 / UTF-16 / UTF-32 mode
2185 find_recurse(const pcre_uchar *code, BOOL utf)
2240     /* In UTF-8 mode, opcodes that are followed by a character may be followed
2245     if (utf) switch(c)
2307     (void)(utf);  /* Keep compiler happy by referencing function argument */
2330   utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
2344   BOOL utf, compile_data *cd, recurse_check *recurses)
2416       if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
2472         if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))
2617     /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
2652     if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
2670     if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
2711   utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
2719   branch_chain *bcptr, BOOL utf, compile_data *cd)
2723   if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
2849   utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
2862 get_chr_property_list(const pcre_uchar *code, BOOL utf,
2874 utf = utf;  /* Suppress "unused parameter" compiler warning */
2953   if (chr < 128 || (chr < 256 && !utf))
3059   utf         TRUE in UTF-8 / UTF-16 / UTF-32 mode
3067 compare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd,
3155       if (!compare_opcodes(code, utf, cd, base_list, base_end)) return FALSE;
3175     if (!compare_opcodes(next_code, utf, cd, base_list, base_end))
3187   code = get_chr_property_list(code, utf, cd->fcc, list);
3208       /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
3209       || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
3214     if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
3562           list_ptr[2] + LINK_SIZE, utf)) return FALSE;
3595   utf         TRUE in UTF-8 / UTF-16 / UTF-32 mode
3602 auto_possessify(pcre_uchar *code, BOOL utf, const compile_data *cd)
3617       get_chr_property_list(code, utf, cd->fcc, list) : NULL;
3620     if (end != NULL && compare_opcodes(end, utf, cd, list, end))
3672       end = get_chr_property_list(code, utf, cd->fcc, list);
3676       if (compare_opcodes(end, utf, cd, list, end))
3748   /* In UTF-8 mode, opcodes that are followed by a character may be followed by
3753   if (utf) switch(c)
3815   (void)(utf);  /* Keep compiler happy by referencing function argument */
3948   utf        TRUE in UTF-8 / UTF-16 / UTF-32 mode
3956 adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
3961 while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
4051 /* This function is passed the start and end of a class range, in UTF-8 mode
4119 valid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
4181   /* Not UTF-mode, or no UCP */
4241   /* Without UTF support, character values are constrained by the bit length,
4336 BOOL utf = (options & PCRE_UTF8) != 0;
4344     (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
4417 /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
4423 BOOL utf = (options & PCRE_UTF8) != 0;
4428 BOOL utf = FALSE;
4433 though it will not be used in non-UTF 8-bit cases. This avoids having to supply
4626         if (utf) FORWARDCHAR(ptr);
4859       if (utf && HAS_EXTRALEN(c))
5234         if (utf)
5240         d = *ptr;  /* Not UTF-8 mode */
5340           /* For caseless UTF-8 mode when UCP support is available, check
5345           if (utf && (options & PCRE_CASELESS) != 0 &&
5359             if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
5375         if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
5566           if (utf) FORWARDCHAR(p);
5639       /* Deal with UTF characters that take up more than one character. It's
5645       if (utf && NOT_FIRSTCHAR(code[-1]))
5649         c = (int)(code - lastchar);     /* Length of UTF-8 character */
5656       /* Handle the case of a single charater - either with no UTF support, or
5657       with UTF disabled, or for a single character UTF character. */
5747         required property. In UTF-8 mode, long characters have their length in
5753           if (utf && (c & UTF_LENGTH) != 0)
5778           if (utf && (c & UTF_LENGTH) != 0)
5808       if (utf && (c & UTF_LENGTH) != 0)
5927           adjust_recurse(previous, 1, utf, cd, save_hwm);
5951           adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm);
6188               if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
6211               adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm);
6300         if (utf && HAS_EXTRALEN(tempcode[-1]))
6345           adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
6394         adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
7304               called = PRIV(find_bracket)(cd->start_code, utf, recno);
7340                      could_be_empty(called, code, bcptr, utf, cd))
7862         /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE
7867           *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
7873     /* We have a data character whose value is in c. In UTF-8 mode it may have
7878     if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
7892     when the extended flag is set. If we are in a UTF mode, it may be a
7901     if (utf && HAS_EXTRALEN(c))
7906     in mclength. When not in UTF-8 mode, the length is always 1. */
7911     /* For caseless UTF-8 mode when UCP support is available, check whether
7916     if (utf && (options & PCRE_CASELESS) != 0)
8793 BOOL utf;
8875 relevant libraries, but (*UTF) is generic and always supported. Note that
8962 utf = (options & PCRE_UTF8) != 0;
8963 if (utf && never_utf)
8969 /* Can't support UTF unless PCRE has been compiled to include the code. The
8975 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
8988 if (utf)
9261       groupptr = PRIV(find_bracket)(codestart, utf, recno);
9291   auto_possessify(temp, utf, cd);
9311   for (cc = (pcre_uchar *)PRIV(find_bracket)(codestart, utf, -1);
9313        cc = (pcre_uchar *)PRIV(find_bracket)(cc, utf, -1))
9382         if (utf)
9424     if (utf)
9496   if (could_be_empty_branch(codestart, code, utf, cd, NULL))
OpenGrok