Home | History | Annotate | Download | only in src

Lines Matching defs:utf

68 string of that length that matches. In UTF mode, the result is in characters
78 utf UTF flag
83 -1 \C in UTF-8 mode
93 PCRE2_SPTR startcode, BOOL utf, recurse_check *recurses, int *countptr)
169 d = find_minlength(re, cc, startcode, utf, recurses, countptr);
189 prev_cap_d = find_minlength(re, cc, startcode, utf, recurses, countptr);
291 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
303 may need to skip over a multibyte character in UTF mode. */
312 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
354 /* The single-byte matcher means we can't proceed in UTF mode. (In
355 non-UTF mode \C will actually be turned into OP_ALLANY, so won't ever
360 if (utf) return -1;
463 ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
487 dd = find_minlength(re, cs, startcode, utf, &this_recurse, countptr);
506 ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
527 d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr);
603 prev_recurse_d = find_minlength(re, cs, startcode, utf, &this_recurse,
617 of a character, we must take special action for UTF-8 characters. As it
664 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
712 utf TRUE for UTF mode
718 set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf)
721 (void)utf; /* Stop compiler warning when UTF not supported */
732 /* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find
736 if (utf)
750 if (utf)
763 /* Not UTF */
777 /* This function sets starting bits for a character type. In UTF-8 mode, we can
779 confusion with bytes in the middle of UTF-8 characters. In a "traditional"
782 So we deal with that case by considering the UTF-8 encoding.
787 table_limit 32 for non-UTF-8; 16 for UTF-8
809 #endif /* UTF-8 */
818 In UTF-8 mode, we can only do a direct setting for bytes less than 128, as
819 otherwise there can be confusion with bytes in the middle of UTF-8 characters.
821 specific high-valued UTF-8 characters, in this case we have to set the bits for
828 table_limit 32 for non-UTF-8; 16 for UTF-8
853 the 255 bit to be set. When calling set[_not]_type_bits() in UTF-8 (sic) mode
865 utf TRUE if in UTF mode
874 set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
880 int table_limit = utf? 16:32;
994 if (utf)
1030 rc = set_start_bits(re, tcode, utf);
1081 rc = set_start_bits(re, ++tcode, utf);
1103 tcode = set_table_bit(re, tcode + 1, FALSE, utf);
1112 tcode = set_table_bit(re, tcode + 1, TRUE, utf);
1120 tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, FALSE, utf);
1126 tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, TRUE, utf);
1138 (void)set_table_bit(re, tcode + 1, FALSE, utf);
1149 (void)set_table_bit(re, tcode + 1, TRUE, utf);
1164 the bits for 0xA0 and for code units >= 255, independently of UTF. */
1170 /* For the 8-bit library in UTF-8 mode, set the bits for the first code
1174 if (utf)
1183 /* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
1203 the bits for NEL and for code units >= 255, independently of UTF. */
1209 /* For the 8-bit library in UTF-8 mode, set the bits for the first code
1213 if (utf)
1220 /* For the 8-bit library not in UTF-8 mode, set the bit for NEL. */
1303 the bits for 0xA0 and for code units >= 255, independently of UTF. */
1309 /* For the 8-bit library in UTF-8 mode, set the bits for the first code
1313 if (utf)
1322 /* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
1340 the bits for NEL and for code units >= 255, independently of UTF. */
1346 /* For the 8-bit library in UTF-8 mode, set the bits for the first code
1350 if (utf)
1357 /* For the 8-bit library not in UTF-8 mode, set the bit for NEL. */
1412 in UTF mode, any byte with a value >= 0xc4 is a potentially valid starter
1413 because it starts a character with a value > 255. In 8-bit non-UTF mode,
1419 if (utf)
1440 /* When wide characters are supported, classmap may be NULL. In UTF-8
1450 if (utf)
1465 /* In all modes except UTF-8, the two bit maps are compatible. */
1530 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
1544 int rc = set_start_bits(re, code, utf);
1554 switch(min = find_minlength(re, code, code, utf, NULL, &count))
1556 case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */